1235633Sdim//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file contains the X86 implementation of the TargetInstrInfo class.
11193323Sed//
12193323Sed//===----------------------------------------------------------------------===//
13193323Sed
14193323Sed#include "X86InstrInfo.h"
15193323Sed#include "X86.h"
16193323Sed#include "X86InstrBuilder.h"
17193323Sed#include "X86MachineFunctionInfo.h"
18193323Sed#include "X86Subtarget.h"
19193323Sed#include "X86TargetMachine.h"
20193323Sed#include "llvm/ADT/STLExtras.h"
21252723Sdim#include "llvm/CodeGen/LiveVariables.h"
22193323Sed#include "llvm/CodeGen/MachineConstantPool.h"
23245431Sdim#include "llvm/CodeGen/MachineDominators.h"
24193323Sed#include "llvm/CodeGen/MachineFrameInfo.h"
25193323Sed#include "llvm/CodeGen/MachineInstrBuilder.h"
26193323Sed#include "llvm/CodeGen/MachineRegisterInfo.h"
27263509Sdim#include "llvm/CodeGen/StackMaps.h"
28252723Sdim#include "llvm/IR/DerivedTypes.h"
29252723Sdim#include "llvm/IR/LLVMContext.h"
30235633Sdim#include "llvm/MC/MCAsmInfo.h"
31207618Srdivacky#include "llvm/MC/MCInst.h"
32193323Sed#include "llvm/Support/CommandLine.h"
33202375Srdivacky#include "llvm/Support/Debug.h"
34198090Srdivacky#include "llvm/Support/ErrorHandling.h"
35198090Srdivacky#include "llvm/Support/raw_ostream.h"
36193323Sed#include "llvm/Target/TargetOptions.h"
37199481Srdivacky#include <limits>
38199481Srdivacky
39263509Sdim#define GET_INSTRINFO_CTOR_DTOR
40224145Sdim#include "X86GenInstrInfo.inc"
41224145Sdim
42193323Sedusing namespace llvm;
43193323Sed
44198090Srdivackystatic cl::opt<bool>
45198090SrdivackyNoFusing("disable-spill-fusing",
46198090Srdivacky         cl::desc("Disable fusing of spill code into instructions"));
47198090Srdivackystatic cl::opt<bool>
48198090SrdivackyPrintFailedFusing("print-failed-fuse-candidates",
49198090Srdivacky                  cl::desc("Print instructions that the allocator wants to"
50198090Srdivacky                           " fuse, but the X86 backend currently can't"),
51198090Srdivacky                  cl::Hidden);
52198090Srdivackystatic cl::opt<bool>
53198090SrdivackyReMatPICStubLoad("remat-pic-stub-load",
54198090Srdivacky                 cl::desc("Re-materialize load from stub in PIC mode"),
55198090Srdivacky                 cl::init(false), cl::Hidden);
56193323Sed
57226890Sdimenum {
58226890Sdim  // Select which memory operand is being unfolded.
59245431Sdim  // (stored in bits 0 - 3)
60226890Sdim  TB_INDEX_0    = 0,
61226890Sdim  TB_INDEX_1    = 1,
62226890Sdim  TB_INDEX_2    = 2,
63245431Sdim  TB_INDEX_3    = 3,
64245431Sdim  TB_INDEX_MASK = 0xf,
65226890Sdim
66245431Sdim  // Do not insert the reverse map (MemOp -> RegOp) into the table.
67245431Sdim  // This may be needed because there is a many -> one mapping.
68245431Sdim  TB_NO_REVERSE   = 1 << 4,
69245431Sdim
70245431Sdim  // Do not insert the forward map (RegOp -> MemOp) into the table.
71245431Sdim  // This is needed for Native Client, which prohibits branch
72245431Sdim  // instructions from using a memory operand.
73245431Sdim  TB_NO_FORWARD   = 1 << 5,
74245431Sdim
75245431Sdim  TB_FOLDED_LOAD  = 1 << 6,
76245431Sdim  TB_FOLDED_STORE = 1 << 7,
77245431Sdim
78226890Sdim  // Minimum alignment required for load/store.
79226890Sdim  // Used for RegOp->MemOp conversion.
80226890Sdim  // (stored in bits 8 - 15)
81226890Sdim  TB_ALIGN_SHIFT = 8,
82226890Sdim  TB_ALIGN_NONE  =    0 << TB_ALIGN_SHIFT,
83226890Sdim  TB_ALIGN_16    =   16 << TB_ALIGN_SHIFT,
84226890Sdim  TB_ALIGN_32    =   32 << TB_ALIGN_SHIFT,
85263509Sdim  TB_ALIGN_64    =   64 << TB_ALIGN_SHIFT,
86245431Sdim  TB_ALIGN_MASK  = 0xff << TB_ALIGN_SHIFT
87226890Sdim};
88226890Sdim
89235633Sdimstruct X86OpTblEntry {
90235633Sdim  uint16_t RegOp;
91235633Sdim  uint16_t MemOp;
92245431Sdim  uint16_t Flags;
93235633Sdim};
94235633Sdim
95263509Sdim// Pin the vtable to this file.
96263509Sdimvoid X86InstrInfo::anchor() {}
97263509Sdim
98193323SedX86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
99224145Sdim  : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
100224145Sdim                     ? X86::ADJCALLSTACKDOWN64
101224145Sdim                     : X86::ADJCALLSTACKDOWN32),
102224145Sdim                    (tm.getSubtarget<X86Subtarget>().is64Bit()
103224145Sdim                     ? X86::ADJCALLSTACKUP64
104224145Sdim                     : X86::ADJCALLSTACKUP32)),
105263509Sdim    TM(tm), RI(tm) {
106218893Sdim
107235633Sdim  static const X86OpTblEntry OpTbl2Addr[] = {
108226890Sdim    { X86::ADC32ri,     X86::ADC32mi,    0 },
109226890Sdim    { X86::ADC32ri8,    X86::ADC32mi8,   0 },
110226890Sdim    { X86::ADC32rr,     X86::ADC32mr,    0 },
111226890Sdim    { X86::ADC64ri32,   X86::ADC64mi32,  0 },
112226890Sdim    { X86::ADC64ri8,    X86::ADC64mi8,   0 },
113226890Sdim    { X86::ADC64rr,     X86::ADC64mr,    0 },
114226890Sdim    { X86::ADD16ri,     X86::ADD16mi,    0 },
115226890Sdim    { X86::ADD16ri8,    X86::ADD16mi8,   0 },
116226890Sdim    { X86::ADD16ri_DB,  X86::ADD16mi,    TB_NO_REVERSE },
117226890Sdim    { X86::ADD16ri8_DB, X86::ADD16mi8,   TB_NO_REVERSE },
118226890Sdim    { X86::ADD16rr,     X86::ADD16mr,    0 },
119226890Sdim    { X86::ADD16rr_DB,  X86::ADD16mr,    TB_NO_REVERSE },
120226890Sdim    { X86::ADD32ri,     X86::ADD32mi,    0 },
121226890Sdim    { X86::ADD32ri8,    X86::ADD32mi8,   0 },
122226890Sdim    { X86::ADD32ri_DB,  X86::ADD32mi,    TB_NO_REVERSE },
123226890Sdim    { X86::ADD32ri8_DB, X86::ADD32mi8,   TB_NO_REVERSE },
124226890Sdim    { X86::ADD32rr,     X86::ADD32mr,    0 },
125226890Sdim    { X86::ADD32rr_DB,  X86::ADD32mr,    TB_NO_REVERSE },
126226890Sdim    { X86::ADD64ri32,   X86::ADD64mi32,  0 },
127226890Sdim    { X86::ADD64ri8,    X86::ADD64mi8,   0 },
128226890Sdim    { X86::ADD64ri32_DB,X86::ADD64mi32,  TB_NO_REVERSE },
129226890Sdim    { X86::ADD64ri8_DB, X86::ADD64mi8,   TB_NO_REVERSE },
130226890Sdim    { X86::ADD64rr,     X86::ADD64mr,    0 },
131226890Sdim    { X86::ADD64rr_DB,  X86::ADD64mr,    TB_NO_REVERSE },
132226890Sdim    { X86::ADD8ri,      X86::ADD8mi,     0 },
133226890Sdim    { X86::ADD8rr,      X86::ADD8mr,     0 },
134226890Sdim    { X86::AND16ri,     X86::AND16mi,    0 },
135226890Sdim    { X86::AND16ri8,    X86::AND16mi8,   0 },
136226890Sdim    { X86::AND16rr,     X86::AND16mr,    0 },
137226890Sdim    { X86::AND32ri,     X86::AND32mi,    0 },
138226890Sdim    { X86::AND32ri8,    X86::AND32mi8,   0 },
139226890Sdim    { X86::AND32rr,     X86::AND32mr,    0 },
140226890Sdim    { X86::AND64ri32,   X86::AND64mi32,  0 },
141226890Sdim    { X86::AND64ri8,    X86::AND64mi8,   0 },
142226890Sdim    { X86::AND64rr,     X86::AND64mr,    0 },
143226890Sdim    { X86::AND8ri,      X86::AND8mi,     0 },
144226890Sdim    { X86::AND8rr,      X86::AND8mr,     0 },
145226890Sdim    { X86::DEC16r,      X86::DEC16m,     0 },
146226890Sdim    { X86::DEC32r,      X86::DEC32m,     0 },
147226890Sdim    { X86::DEC64_16r,   X86::DEC64_16m,  0 },
148226890Sdim    { X86::DEC64_32r,   X86::DEC64_32m,  0 },
149226890Sdim    { X86::DEC64r,      X86::DEC64m,     0 },
150226890Sdim    { X86::DEC8r,       X86::DEC8m,      0 },
151226890Sdim    { X86::INC16r,      X86::INC16m,     0 },
152226890Sdim    { X86::INC32r,      X86::INC32m,     0 },
153226890Sdim    { X86::INC64_16r,   X86::INC64_16m,  0 },
154226890Sdim    { X86::INC64_32r,   X86::INC64_32m,  0 },
155226890Sdim    { X86::INC64r,      X86::INC64m,     0 },
156226890Sdim    { X86::INC8r,       X86::INC8m,      0 },
157226890Sdim    { X86::NEG16r,      X86::NEG16m,     0 },
158226890Sdim    { X86::NEG32r,      X86::NEG32m,     0 },
159226890Sdim    { X86::NEG64r,      X86::NEG64m,     0 },
160226890Sdim    { X86::NEG8r,       X86::NEG8m,      0 },
161226890Sdim    { X86::NOT16r,      X86::NOT16m,     0 },
162226890Sdim    { X86::NOT32r,      X86::NOT32m,     0 },
163226890Sdim    { X86::NOT64r,      X86::NOT64m,     0 },
164226890Sdim    { X86::NOT8r,       X86::NOT8m,      0 },
165226890Sdim    { X86::OR16ri,      X86::OR16mi,     0 },
166226890Sdim    { X86::OR16ri8,     X86::OR16mi8,    0 },
167226890Sdim    { X86::OR16rr,      X86::OR16mr,     0 },
168226890Sdim    { X86::OR32ri,      X86::OR32mi,     0 },
169226890Sdim    { X86::OR32ri8,     X86::OR32mi8,    0 },
170226890Sdim    { X86::OR32rr,      X86::OR32mr,     0 },
171226890Sdim    { X86::OR64ri32,    X86::OR64mi32,   0 },
172226890Sdim    { X86::OR64ri8,     X86::OR64mi8,    0 },
173226890Sdim    { X86::OR64rr,      X86::OR64mr,     0 },
174226890Sdim    { X86::OR8ri,       X86::OR8mi,      0 },
175226890Sdim    { X86::OR8rr,       X86::OR8mr,      0 },
176226890Sdim    { X86::ROL16r1,     X86::ROL16m1,    0 },
177226890Sdim    { X86::ROL16rCL,    X86::ROL16mCL,   0 },
178226890Sdim    { X86::ROL16ri,     X86::ROL16mi,    0 },
179226890Sdim    { X86::ROL32r1,     X86::ROL32m1,    0 },
180226890Sdim    { X86::ROL32rCL,    X86::ROL32mCL,   0 },
181226890Sdim    { X86::ROL32ri,     X86::ROL32mi,    0 },
182226890Sdim    { X86::ROL64r1,     X86::ROL64m1,    0 },
183226890Sdim    { X86::ROL64rCL,    X86::ROL64mCL,   0 },
184226890Sdim    { X86::ROL64ri,     X86::ROL64mi,    0 },
185226890Sdim    { X86::ROL8r1,      X86::ROL8m1,     0 },
186226890Sdim    { X86::ROL8rCL,     X86::ROL8mCL,    0 },
187226890Sdim    { X86::ROL8ri,      X86::ROL8mi,     0 },
188226890Sdim    { X86::ROR16r1,     X86::ROR16m1,    0 },
189226890Sdim    { X86::ROR16rCL,    X86::ROR16mCL,   0 },
190226890Sdim    { X86::ROR16ri,     X86::ROR16mi,    0 },
191226890Sdim    { X86::ROR32r1,     X86::ROR32m1,    0 },
192226890Sdim    { X86::ROR32rCL,    X86::ROR32mCL,   0 },
193226890Sdim    { X86::ROR32ri,     X86::ROR32mi,    0 },
194226890Sdim    { X86::ROR64r1,     X86::ROR64m1,    0 },
195226890Sdim    { X86::ROR64rCL,    X86::ROR64mCL,   0 },
196226890Sdim    { X86::ROR64ri,     X86::ROR64mi,    0 },
197226890Sdim    { X86::ROR8r1,      X86::ROR8m1,     0 },
198226890Sdim    { X86::ROR8rCL,     X86::ROR8mCL,    0 },
199226890Sdim    { X86::ROR8ri,      X86::ROR8mi,     0 },
200226890Sdim    { X86::SAR16r1,     X86::SAR16m1,    0 },
201226890Sdim    { X86::SAR16rCL,    X86::SAR16mCL,   0 },
202226890Sdim    { X86::SAR16ri,     X86::SAR16mi,    0 },
203226890Sdim    { X86::SAR32r1,     X86::SAR32m1,    0 },
204226890Sdim    { X86::SAR32rCL,    X86::SAR32mCL,   0 },
205226890Sdim    { X86::SAR32ri,     X86::SAR32mi,    0 },
206226890Sdim    { X86::SAR64r1,     X86::SAR64m1,    0 },
207226890Sdim    { X86::SAR64rCL,    X86::SAR64mCL,   0 },
208226890Sdim    { X86::SAR64ri,     X86::SAR64mi,    0 },
209226890Sdim    { X86::SAR8r1,      X86::SAR8m1,     0 },
210226890Sdim    { X86::SAR8rCL,     X86::SAR8mCL,    0 },
211226890Sdim    { X86::SAR8ri,      X86::SAR8mi,     0 },
212226890Sdim    { X86::SBB32ri,     X86::SBB32mi,    0 },
213226890Sdim    { X86::SBB32ri8,    X86::SBB32mi8,   0 },
214226890Sdim    { X86::SBB32rr,     X86::SBB32mr,    0 },
215226890Sdim    { X86::SBB64ri32,   X86::SBB64mi32,  0 },
216226890Sdim    { X86::SBB64ri8,    X86::SBB64mi8,   0 },
217226890Sdim    { X86::SBB64rr,     X86::SBB64mr,    0 },
218226890Sdim    { X86::SHL16rCL,    X86::SHL16mCL,   0 },
219226890Sdim    { X86::SHL16ri,     X86::SHL16mi,    0 },
220226890Sdim    { X86::SHL32rCL,    X86::SHL32mCL,   0 },
221226890Sdim    { X86::SHL32ri,     X86::SHL32mi,    0 },
222226890Sdim    { X86::SHL64rCL,    X86::SHL64mCL,   0 },
223226890Sdim    { X86::SHL64ri,     X86::SHL64mi,    0 },
224226890Sdim    { X86::SHL8rCL,     X86::SHL8mCL,    0 },
225226890Sdim    { X86::SHL8ri,      X86::SHL8mi,     0 },
226226890Sdim    { X86::SHLD16rrCL,  X86::SHLD16mrCL, 0 },
227226890Sdim    { X86::SHLD16rri8,  X86::SHLD16mri8, 0 },
228226890Sdim    { X86::SHLD32rrCL,  X86::SHLD32mrCL, 0 },
229226890Sdim    { X86::SHLD32rri8,  X86::SHLD32mri8, 0 },
230226890Sdim    { X86::SHLD64rrCL,  X86::SHLD64mrCL, 0 },
231226890Sdim    { X86::SHLD64rri8,  X86::SHLD64mri8, 0 },
232226890Sdim    { X86::SHR16r1,     X86::SHR16m1,    0 },
233226890Sdim    { X86::SHR16rCL,    X86::SHR16mCL,   0 },
234226890Sdim    { X86::SHR16ri,     X86::SHR16mi,    0 },
235226890Sdim    { X86::SHR32r1,     X86::SHR32m1,    0 },
236226890Sdim    { X86::SHR32rCL,    X86::SHR32mCL,   0 },
237226890Sdim    { X86::SHR32ri,     X86::SHR32mi,    0 },
238226890Sdim    { X86::SHR64r1,     X86::SHR64m1,    0 },
239226890Sdim    { X86::SHR64rCL,    X86::SHR64mCL,   0 },
240226890Sdim    { X86::SHR64ri,     X86::SHR64mi,    0 },
241226890Sdim    { X86::SHR8r1,      X86::SHR8m1,     0 },
242226890Sdim    { X86::SHR8rCL,     X86::SHR8mCL,    0 },
243226890Sdim    { X86::SHR8ri,      X86::SHR8mi,     0 },
244226890Sdim    { X86::SHRD16rrCL,  X86::SHRD16mrCL, 0 },
245226890Sdim    { X86::SHRD16rri8,  X86::SHRD16mri8, 0 },
246226890Sdim    { X86::SHRD32rrCL,  X86::SHRD32mrCL, 0 },
247226890Sdim    { X86::SHRD32rri8,  X86::SHRD32mri8, 0 },
248226890Sdim    { X86::SHRD64rrCL,  X86::SHRD64mrCL, 0 },
249226890Sdim    { X86::SHRD64rri8,  X86::SHRD64mri8, 0 },
250226890Sdim    { X86::SUB16ri,     X86::SUB16mi,    0 },
251226890Sdim    { X86::SUB16ri8,    X86::SUB16mi8,   0 },
252226890Sdim    { X86::SUB16rr,     X86::SUB16mr,    0 },
253226890Sdim    { X86::SUB32ri,     X86::SUB32mi,    0 },
254226890Sdim    { X86::SUB32ri8,    X86::SUB32mi8,   0 },
255226890Sdim    { X86::SUB32rr,     X86::SUB32mr,    0 },
256226890Sdim    { X86::SUB64ri32,   X86::SUB64mi32,  0 },
257226890Sdim    { X86::SUB64ri8,    X86::SUB64mi8,   0 },
258226890Sdim    { X86::SUB64rr,     X86::SUB64mr,    0 },
259226890Sdim    { X86::SUB8ri,      X86::SUB8mi,     0 },
260226890Sdim    { X86::SUB8rr,      X86::SUB8mr,     0 },
261226890Sdim    { X86::XOR16ri,     X86::XOR16mi,    0 },
262226890Sdim    { X86::XOR16ri8,    X86::XOR16mi8,   0 },
263226890Sdim    { X86::XOR16rr,     X86::XOR16mr,    0 },
264226890Sdim    { X86::XOR32ri,     X86::XOR32mi,    0 },
265226890Sdim    { X86::XOR32ri8,    X86::XOR32mi8,   0 },
266226890Sdim    { X86::XOR32rr,     X86::XOR32mr,    0 },
267226890Sdim    { X86::XOR64ri32,   X86::XOR64mi32,  0 },
268226890Sdim    { X86::XOR64ri8,    X86::XOR64mi8,   0 },
269226890Sdim    { X86::XOR64rr,     X86::XOR64mr,    0 },
270226890Sdim    { X86::XOR8ri,      X86::XOR8mi,     0 },
271226890Sdim    { X86::XOR8rr,      X86::XOR8mr,     0 }
272193323Sed  };
273193323Sed
274193323Sed  for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
275235633Sdim    unsigned RegOp = OpTbl2Addr[i].RegOp;
276235633Sdim    unsigned MemOp = OpTbl2Addr[i].MemOp;
277235633Sdim    unsigned Flags = OpTbl2Addr[i].Flags;
278226890Sdim    AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
279226890Sdim                  RegOp, MemOp,
280226890Sdim                  // Index 0, folded load and store, no alignment requirement.
281226890Sdim                  Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
282193323Sed  }
283193323Sed
284235633Sdim  static const X86OpTblEntry OpTbl0[] = {
285226890Sdim    { X86::BT16ri8,     X86::BT16mi8,       TB_FOLDED_LOAD },
286226890Sdim    { X86::BT32ri8,     X86::BT32mi8,       TB_FOLDED_LOAD },
287226890Sdim    { X86::BT64ri8,     X86::BT64mi8,       TB_FOLDED_LOAD },
288226890Sdim    { X86::CALL32r,     X86::CALL32m,       TB_FOLDED_LOAD },
289226890Sdim    { X86::CALL64r,     X86::CALL64m,       TB_FOLDED_LOAD },
290226890Sdim    { X86::CMP16ri,     X86::CMP16mi,       TB_FOLDED_LOAD },
291226890Sdim    { X86::CMP16ri8,    X86::CMP16mi8,      TB_FOLDED_LOAD },
292226890Sdim    { X86::CMP16rr,     X86::CMP16mr,       TB_FOLDED_LOAD },
293226890Sdim    { X86::CMP32ri,     X86::CMP32mi,       TB_FOLDED_LOAD },
294226890Sdim    { X86::CMP32ri8,    X86::CMP32mi8,      TB_FOLDED_LOAD },
295226890Sdim    { X86::CMP32rr,     X86::CMP32mr,       TB_FOLDED_LOAD },
296226890Sdim    { X86::CMP64ri32,   X86::CMP64mi32,     TB_FOLDED_LOAD },
297226890Sdim    { X86::CMP64ri8,    X86::CMP64mi8,      TB_FOLDED_LOAD },
298226890Sdim    { X86::CMP64rr,     X86::CMP64mr,       TB_FOLDED_LOAD },
299226890Sdim    { X86::CMP8ri,      X86::CMP8mi,        TB_FOLDED_LOAD },
300226890Sdim    { X86::CMP8rr,      X86::CMP8mr,        TB_FOLDED_LOAD },
301226890Sdim    { X86::DIV16r,      X86::DIV16m,        TB_FOLDED_LOAD },
302226890Sdim    { X86::DIV32r,      X86::DIV32m,        TB_FOLDED_LOAD },
303226890Sdim    { X86::DIV64r,      X86::DIV64m,        TB_FOLDED_LOAD },
304226890Sdim    { X86::DIV8r,       X86::DIV8m,         TB_FOLDED_LOAD },
305252723Sdim    { X86::EXTRACTPSrr, X86::EXTRACTPSmr,   TB_FOLDED_STORE },
306226890Sdim    { X86::IDIV16r,     X86::IDIV16m,       TB_FOLDED_LOAD },
307226890Sdim    { X86::IDIV32r,     X86::IDIV32m,       TB_FOLDED_LOAD },
308226890Sdim    { X86::IDIV64r,     X86::IDIV64m,       TB_FOLDED_LOAD },
309226890Sdim    { X86::IDIV8r,      X86::IDIV8m,        TB_FOLDED_LOAD },
310226890Sdim    { X86::IMUL16r,     X86::IMUL16m,       TB_FOLDED_LOAD },
311226890Sdim    { X86::IMUL32r,     X86::IMUL32m,       TB_FOLDED_LOAD },
312226890Sdim    { X86::IMUL64r,     X86::IMUL64m,       TB_FOLDED_LOAD },
313226890Sdim    { X86::IMUL8r,      X86::IMUL8m,        TB_FOLDED_LOAD },
314226890Sdim    { X86::JMP32r,      X86::JMP32m,        TB_FOLDED_LOAD },
315226890Sdim    { X86::JMP64r,      X86::JMP64m,        TB_FOLDED_LOAD },
316226890Sdim    { X86::MOV16ri,     X86::MOV16mi,       TB_FOLDED_STORE },
317226890Sdim    { X86::MOV16rr,     X86::MOV16mr,       TB_FOLDED_STORE },
318226890Sdim    { X86::MOV32ri,     X86::MOV32mi,       TB_FOLDED_STORE },
319226890Sdim    { X86::MOV32rr,     X86::MOV32mr,       TB_FOLDED_STORE },
320226890Sdim    { X86::MOV64ri32,   X86::MOV64mi32,     TB_FOLDED_STORE },
321226890Sdim    { X86::MOV64rr,     X86::MOV64mr,       TB_FOLDED_STORE },
322226890Sdim    { X86::MOV8ri,      X86::MOV8mi,        TB_FOLDED_STORE },
323226890Sdim    { X86::MOV8rr,      X86::MOV8mr,        TB_FOLDED_STORE },
324226890Sdim    { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE },
325226890Sdim    { X86::MOVAPDrr,    X86::MOVAPDmr,      TB_FOLDED_STORE | TB_ALIGN_16 },
326226890Sdim    { X86::MOVAPSrr,    X86::MOVAPSmr,      TB_FOLDED_STORE | TB_ALIGN_16 },
327226890Sdim    { X86::MOVDQArr,    X86::MOVDQAmr,      TB_FOLDED_STORE | TB_ALIGN_16 },
328226890Sdim    { X86::MOVPDI2DIrr, X86::MOVPDI2DImr,   TB_FOLDED_STORE },
329226890Sdim    { X86::MOVPQIto64rr,X86::MOVPQI2QImr,   TB_FOLDED_STORE },
330226890Sdim    { X86::MOVSDto64rr, X86::MOVSDto64mr,   TB_FOLDED_STORE },
331226890Sdim    { X86::MOVSS2DIrr,  X86::MOVSS2DImr,    TB_FOLDED_STORE },
332226890Sdim    { X86::MOVUPDrr,    X86::MOVUPDmr,      TB_FOLDED_STORE },
333226890Sdim    { X86::MOVUPSrr,    X86::MOVUPSmr,      TB_FOLDED_STORE },
334226890Sdim    { X86::MUL16r,      X86::MUL16m,        TB_FOLDED_LOAD },
335226890Sdim    { X86::MUL32r,      X86::MUL32m,        TB_FOLDED_LOAD },
336226890Sdim    { X86::MUL64r,      X86::MUL64m,        TB_FOLDED_LOAD },
337226890Sdim    { X86::MUL8r,       X86::MUL8m,         TB_FOLDED_LOAD },
338226890Sdim    { X86::SETAEr,      X86::SETAEm,        TB_FOLDED_STORE },
339226890Sdim    { X86::SETAr,       X86::SETAm,         TB_FOLDED_STORE },
340226890Sdim    { X86::SETBEr,      X86::SETBEm,        TB_FOLDED_STORE },
341226890Sdim    { X86::SETBr,       X86::SETBm,         TB_FOLDED_STORE },
342226890Sdim    { X86::SETEr,       X86::SETEm,         TB_FOLDED_STORE },
343226890Sdim    { X86::SETGEr,      X86::SETGEm,        TB_FOLDED_STORE },
344226890Sdim    { X86::SETGr,       X86::SETGm,         TB_FOLDED_STORE },
345226890Sdim    { X86::SETLEr,      X86::SETLEm,        TB_FOLDED_STORE },
346226890Sdim    { X86::SETLr,       X86::SETLm,         TB_FOLDED_STORE },
347226890Sdim    { X86::SETNEr,      X86::SETNEm,        TB_FOLDED_STORE },
348226890Sdim    { X86::SETNOr,      X86::SETNOm,        TB_FOLDED_STORE },
349226890Sdim    { X86::SETNPr,      X86::SETNPm,        TB_FOLDED_STORE },
350226890Sdim    { X86::SETNSr,      X86::SETNSm,        TB_FOLDED_STORE },
351226890Sdim    { X86::SETOr,       X86::SETOm,         TB_FOLDED_STORE },
352226890Sdim    { X86::SETPr,       X86::SETPm,         TB_FOLDED_STORE },
353226890Sdim    { X86::SETSr,       X86::SETSm,         TB_FOLDED_STORE },
354226890Sdim    { X86::TAILJMPr,    X86::TAILJMPm,      TB_FOLDED_LOAD },
355226890Sdim    { X86::TAILJMPr64,  X86::TAILJMPm64,    TB_FOLDED_LOAD },
356226890Sdim    { X86::TEST16ri,    X86::TEST16mi,      TB_FOLDED_LOAD },
357226890Sdim    { X86::TEST32ri,    X86::TEST32mi,      TB_FOLDED_LOAD },
358226890Sdim    { X86::TEST64ri32,  X86::TEST64mi32,    TB_FOLDED_LOAD },
359226890Sdim    { X86::TEST8ri,     X86::TEST8mi,       TB_FOLDED_LOAD },
360226890Sdim    // AVX 128-bit versions of foldable instructions
361252723Sdim    { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr,  TB_FOLDED_STORE  },
362235633Sdim    { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
363226890Sdim    { X86::VMOVAPDrr,   X86::VMOVAPDmr,     TB_FOLDED_STORE | TB_ALIGN_16 },
364226890Sdim    { X86::VMOVAPSrr,   X86::VMOVAPSmr,     TB_FOLDED_STORE | TB_ALIGN_16 },
365226890Sdim    { X86::VMOVDQArr,   X86::VMOVDQAmr,     TB_FOLDED_STORE | TB_ALIGN_16 },
366226890Sdim    { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr,  TB_FOLDED_STORE },
367226890Sdim    { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE },
368226890Sdim    { X86::VMOVSDto64rr,X86::VMOVSDto64mr,  TB_FOLDED_STORE },
369226890Sdim    { X86::VMOVSS2DIrr, X86::VMOVSS2DImr,   TB_FOLDED_STORE },
370226890Sdim    { X86::VMOVUPDrr,   X86::VMOVUPDmr,     TB_FOLDED_STORE },
371226890Sdim    { X86::VMOVUPSrr,   X86::VMOVUPSmr,     TB_FOLDED_STORE },
372226890Sdim    // AVX 256-bit foldable instructions
373235633Sdim    { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
374226890Sdim    { X86::VMOVAPDYrr,  X86::VMOVAPDYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },
375226890Sdim    { X86::VMOVAPSYrr,  X86::VMOVAPSYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },
376226890Sdim    { X86::VMOVDQAYrr,  X86::VMOVDQAYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },
377226890Sdim    { X86::VMOVUPDYrr,  X86::VMOVUPDYmr,    TB_FOLDED_STORE },
378263509Sdim    { X86::VMOVUPSYrr,  X86::VMOVUPSYmr,    TB_FOLDED_STORE },
379263509Sdim    // AVX-512 foldable instructions
380263509Sdim    { X86::VMOVPDI2DIZrr,X86::VMOVPDI2DIZmr,  TB_FOLDED_STORE }
381193323Sed  };
382193323Sed
383193323Sed  for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
384235633Sdim    unsigned RegOp      = OpTbl0[i].RegOp;
385235633Sdim    unsigned MemOp      = OpTbl0[i].MemOp;
386235633Sdim    unsigned Flags      = OpTbl0[i].Flags;
387226890Sdim    AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
388226890Sdim                  RegOp, MemOp, TB_INDEX_0 | Flags);
389193323Sed  }
390193323Sed
391235633Sdim  static const X86OpTblEntry OpTbl1[] = {
392226890Sdim    { X86::CMP16rr,         X86::CMP16rm,             0 },
393226890Sdim    { X86::CMP32rr,         X86::CMP32rm,             0 },
394226890Sdim    { X86::CMP64rr,         X86::CMP64rm,             0 },
395226890Sdim    { X86::CMP8rr,          X86::CMP8rm,              0 },
396226890Sdim    { X86::CVTSD2SSrr,      X86::CVTSD2SSrm,          0 },
397226890Sdim    { X86::CVTSI2SD64rr,    X86::CVTSI2SD64rm,        0 },
398226890Sdim    { X86::CVTSI2SDrr,      X86::CVTSI2SDrm,          0 },
399226890Sdim    { X86::CVTSI2SS64rr,    X86::CVTSI2SS64rm,        0 },
400226890Sdim    { X86::CVTSI2SSrr,      X86::CVTSI2SSrm,          0 },
401226890Sdim    { X86::CVTSS2SDrr,      X86::CVTSS2SDrm,          0 },
402226890Sdim    { X86::CVTTSD2SI64rr,   X86::CVTTSD2SI64rm,       0 },
403226890Sdim    { X86::CVTTSD2SIrr,     X86::CVTTSD2SIrm,         0 },
404226890Sdim    { X86::CVTTSS2SI64rr,   X86::CVTTSS2SI64rm,       0 },
405226890Sdim    { X86::CVTTSS2SIrr,     X86::CVTTSS2SIrm,         0 },
406226890Sdim    { X86::IMUL16rri,       X86::IMUL16rmi,           0 },
407226890Sdim    { X86::IMUL16rri8,      X86::IMUL16rmi8,          0 },
408226890Sdim    { X86::IMUL32rri,       X86::IMUL32rmi,           0 },
409226890Sdim    { X86::IMUL32rri8,      X86::IMUL32rmi8,          0 },
410226890Sdim    { X86::IMUL64rri32,     X86::IMUL64rmi32,         0 },
411226890Sdim    { X86::IMUL64rri8,      X86::IMUL64rmi8,          0 },
412226890Sdim    { X86::Int_COMISDrr,    X86::Int_COMISDrm,        0 },
413226890Sdim    { X86::Int_COMISSrr,    X86::Int_COMISSrm,        0 },
414226890Sdim    { X86::CVTSD2SI64rr,    X86::CVTSD2SI64rm,        0 },
415226890Sdim    { X86::CVTSD2SIrr,      X86::CVTSD2SIrm,          0 },
416245431Sdim    { X86::CVTSS2SI64rr,    X86::CVTSS2SI64rm,        0 },
417245431Sdim    { X86::CVTSS2SIrr,      X86::CVTSS2SIrm,          0 },
418226890Sdim    { X86::CVTTPD2DQrr,     X86::CVTTPD2DQrm,         TB_ALIGN_16 },
419226890Sdim    { X86::CVTTPS2DQrr,     X86::CVTTPS2DQrm,         TB_ALIGN_16 },
420226890Sdim    { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm,  0 },
421226890Sdim    { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm,     0 },
422226890Sdim    { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm,  0 },
423226890Sdim    { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm,     0 },
424226890Sdim    { X86::Int_UCOMISDrr,   X86::Int_UCOMISDrm,       0 },
425226890Sdim    { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm,       0 },
426226890Sdim    { X86::MOV16rr,         X86::MOV16rm,             0 },
427226890Sdim    { X86::MOV32rr,         X86::MOV32rm,             0 },
428226890Sdim    { X86::MOV64rr,         X86::MOV64rm,             0 },
429226890Sdim    { X86::MOV64toPQIrr,    X86::MOVQI2PQIrm,         0 },
430226890Sdim    { X86::MOV64toSDrr,     X86::MOV64toSDrm,         0 },
431226890Sdim    { X86::MOV8rr,          X86::MOV8rm,              0 },
432226890Sdim    { X86::MOVAPDrr,        X86::MOVAPDrm,            TB_ALIGN_16 },
433226890Sdim    { X86::MOVAPSrr,        X86::MOVAPSrm,            TB_ALIGN_16 },
434226890Sdim    { X86::MOVDDUPrr,       X86::MOVDDUPrm,           0 },
435226890Sdim    { X86::MOVDI2PDIrr,     X86::MOVDI2PDIrm,         0 },
436226890Sdim    { X86::MOVDI2SSrr,      X86::MOVDI2SSrm,          0 },
437226890Sdim    { X86::MOVDQArr,        X86::MOVDQArm,            TB_ALIGN_16 },
438226890Sdim    { X86::MOVSHDUPrr,      X86::MOVSHDUPrm,          TB_ALIGN_16 },
439226890Sdim    { X86::MOVSLDUPrr,      X86::MOVSLDUPrm,          TB_ALIGN_16 },
440226890Sdim    { X86::MOVSX16rr8,      X86::MOVSX16rm8,          0 },
441226890Sdim    { X86::MOVSX32rr16,     X86::MOVSX32rm16,         0 },
442226890Sdim    { X86::MOVSX32rr8,      X86::MOVSX32rm8,          0 },
443226890Sdim    { X86::MOVSX64rr16,     X86::MOVSX64rm16,         0 },
444226890Sdim    { X86::MOVSX64rr32,     X86::MOVSX64rm32,         0 },
445226890Sdim    { X86::MOVSX64rr8,      X86::MOVSX64rm8,          0 },
446226890Sdim    { X86::MOVUPDrr,        X86::MOVUPDrm,            TB_ALIGN_16 },
447226890Sdim    { X86::MOVUPSrr,        X86::MOVUPSrm,            0 },
448226890Sdim    { X86::MOVZQI2PQIrr,    X86::MOVZQI2PQIrm,        0 },
449226890Sdim    { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm,     TB_ALIGN_16 },
450226890Sdim    { X86::MOVZX16rr8,      X86::MOVZX16rm8,          0 },
451226890Sdim    { X86::MOVZX32rr16,     X86::MOVZX32rm16,         0 },
452226890Sdim    { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8,   0 },
453226890Sdim    { X86::MOVZX32rr8,      X86::MOVZX32rm8,          0 },
454235633Sdim    { X86::PABSBrr128,      X86::PABSBrm128,          TB_ALIGN_16 },
455235633Sdim    { X86::PABSDrr128,      X86::PABSDrm128,          TB_ALIGN_16 },
456235633Sdim    { X86::PABSWrr128,      X86::PABSWrm128,          TB_ALIGN_16 },
457226890Sdim    { X86::PSHUFDri,        X86::PSHUFDmi,            TB_ALIGN_16 },
458226890Sdim    { X86::PSHUFHWri,       X86::PSHUFHWmi,           TB_ALIGN_16 },
459226890Sdim    { X86::PSHUFLWri,       X86::PSHUFLWmi,           TB_ALIGN_16 },
460226890Sdim    { X86::RCPPSr,          X86::RCPPSm,              TB_ALIGN_16 },
461226890Sdim    { X86::RCPPSr_Int,      X86::RCPPSm_Int,          TB_ALIGN_16 },
462226890Sdim    { X86::RSQRTPSr,        X86::RSQRTPSm,            TB_ALIGN_16 },
463226890Sdim    { X86::RSQRTPSr_Int,    X86::RSQRTPSm_Int,        TB_ALIGN_16 },
464226890Sdim    { X86::RSQRTSSr,        X86::RSQRTSSm,            0 },
465226890Sdim    { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int,        0 },
466226890Sdim    { X86::SQRTPDr,         X86::SQRTPDm,             TB_ALIGN_16 },
467226890Sdim    { X86::SQRTPSr,         X86::SQRTPSm,             TB_ALIGN_16 },
468226890Sdim    { X86::SQRTSDr,         X86::SQRTSDm,             0 },
469226890Sdim    { X86::SQRTSDr_Int,     X86::SQRTSDm_Int,         0 },
470226890Sdim    { X86::SQRTSSr,         X86::SQRTSSm,             0 },
471226890Sdim    { X86::SQRTSSr_Int,     X86::SQRTSSm_Int,         0 },
472226890Sdim    { X86::TEST16rr,        X86::TEST16rm,            0 },
473226890Sdim    { X86::TEST32rr,        X86::TEST32rm,            0 },
474226890Sdim    { X86::TEST64rr,        X86::TEST64rm,            0 },
475226890Sdim    { X86::TEST8rr,         X86::TEST8rm,             0 },
476193323Sed    // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
477226890Sdim    { X86::UCOMISDrr,       X86::UCOMISDrm,           0 },
478226890Sdim    { X86::UCOMISSrr,       X86::UCOMISSrm,           0 },
479226890Sdim    // AVX 128-bit versions of foldable instructions
480226890Sdim    { X86::Int_VCOMISDrr,   X86::Int_VCOMISDrm,       0 },
481226890Sdim    { X86::Int_VCOMISSrr,   X86::Int_VCOMISSrm,       0 },
482226890Sdim    { X86::Int_VUCOMISDrr,  X86::Int_VUCOMISDrm,      0 },
483226890Sdim    { X86::Int_VUCOMISSrr,  X86::Int_VUCOMISSrm,      0 },
484245431Sdim    { X86::VCVTTSD2SI64rr,  X86::VCVTTSD2SI64rm,      0 },
485245431Sdim    { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 },
486245431Sdim    { X86::VCVTTSD2SIrr,    X86::VCVTTSD2SIrm,        0 },
487245431Sdim    { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm,    0 },
488245431Sdim    { X86::VCVTTSS2SI64rr,  X86::VCVTTSS2SI64rm,      0 },
489245431Sdim    { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 },
490245431Sdim    { X86::VCVTTSS2SIrr,    X86::VCVTTSS2SIrm,        0 },
491245431Sdim    { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm,    0 },
492245431Sdim    { X86::VCVTSD2SI64rr,   X86::VCVTSD2SI64rm,       0 },
493245431Sdim    { X86::VCVTSD2SIrr,     X86::VCVTSD2SIrm,         0 },
494245431Sdim    { X86::VCVTSS2SI64rr,   X86::VCVTSS2SI64rm,       0 },
495245431Sdim    { X86::VCVTSS2SIrr,     X86::VCVTSS2SIrm,         0 },
496226890Sdim    { X86::VMOV64toPQIrr,   X86::VMOVQI2PQIrm,        0 },
497226890Sdim    { X86::VMOV64toSDrr,    X86::VMOV64toSDrm,        0 },
498226890Sdim    { X86::VMOVAPDrr,       X86::VMOVAPDrm,           TB_ALIGN_16 },
499226890Sdim    { X86::VMOVAPSrr,       X86::VMOVAPSrm,           TB_ALIGN_16 },
500226890Sdim    { X86::VMOVDDUPrr,      X86::VMOVDDUPrm,          0 },
501226890Sdim    { X86::VMOVDI2PDIrr,    X86::VMOVDI2PDIrm,        0 },
502226890Sdim    { X86::VMOVDI2SSrr,     X86::VMOVDI2SSrm,         0 },
503226890Sdim    { X86::VMOVDQArr,       X86::VMOVDQArm,           TB_ALIGN_16 },
504226890Sdim    { X86::VMOVSLDUPrr,     X86::VMOVSLDUPrm,         TB_ALIGN_16 },
505226890Sdim    { X86::VMOVSHDUPrr,     X86::VMOVSHDUPrm,         TB_ALIGN_16 },
506252723Sdim    { X86::VMOVUPDrr,       X86::VMOVUPDrm,           0 },
507226890Sdim    { X86::VMOVUPSrr,       X86::VMOVUPSrm,           0 },
508226890Sdim    { X86::VMOVZQI2PQIrr,   X86::VMOVZQI2PQIrm,       0 },
509226890Sdim    { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm,    TB_ALIGN_16 },
510252723Sdim    { X86::VPABSBrr128,     X86::VPABSBrm128,         0 },
511252723Sdim    { X86::VPABSDrr128,     X86::VPABSDrm128,         0 },
512252723Sdim    { X86::VPABSWrr128,     X86::VPABSWrm128,         0 },
513252723Sdim    { X86::VPERMILPDri,     X86::VPERMILPDmi,         0 },
514252723Sdim    { X86::VPERMILPSri,     X86::VPERMILPSmi,         0 },
515252723Sdim    { X86::VPSHUFDri,       X86::VPSHUFDmi,           0 },
516252723Sdim    { X86::VPSHUFHWri,      X86::VPSHUFHWmi,          0 },
517252723Sdim    { X86::VPSHUFLWri,      X86::VPSHUFLWmi,          0 },
518252723Sdim    { X86::VRCPPSr,         X86::VRCPPSm,             0 },
519252723Sdim    { X86::VRCPPSr_Int,     X86::VRCPPSm_Int,         0 },
520252723Sdim    { X86::VRSQRTPSr,       X86::VRSQRTPSm,           0 },
521252723Sdim    { X86::VRSQRTPSr_Int,   X86::VRSQRTPSm_Int,       0 },
522252723Sdim    { X86::VSQRTPDr,        X86::VSQRTPDm,            0 },
523252723Sdim    { X86::VSQRTPSr,        X86::VSQRTPSm,            0 },
524226890Sdim    { X86::VUCOMISDrr,      X86::VUCOMISDrm,          0 },
525226890Sdim    { X86::VUCOMISSrr,      X86::VUCOMISSrm,          0 },
526245431Sdim    { X86::VBROADCASTSSrr,  X86::VBROADCASTSSrm,      TB_NO_REVERSE },
527245431Sdim
528226890Sdim    // AVX 256-bit foldable instructions
529226890Sdim    { X86::VMOVAPDYrr,      X86::VMOVAPDYrm,          TB_ALIGN_32 },
530226890Sdim    { X86::VMOVAPSYrr,      X86::VMOVAPSYrm,          TB_ALIGN_32 },
531235633Sdim    { X86::VMOVDQAYrr,      X86::VMOVDQAYrm,          TB_ALIGN_32 },
532226890Sdim    { X86::VMOVUPDYrr,      X86::VMOVUPDYrm,          0 },
533235633Sdim    { X86::VMOVUPSYrr,      X86::VMOVUPSYrm,          0 },
534252723Sdim    { X86::VPERMILPDYri,    X86::VPERMILPDYmi,        0 },
535252723Sdim    { X86::VPERMILPSYri,    X86::VPERMILPSYmi,        0 },
536245431Sdim
537235633Sdim    // AVX2 foldable instructions
538252723Sdim    { X86::VPABSBrr256,     X86::VPABSBrm256,         0 },
539252723Sdim    { X86::VPABSDrr256,     X86::VPABSDrm256,         0 },
540252723Sdim    { X86::VPABSWrr256,     X86::VPABSWrm256,         0 },
541252723Sdim    { X86::VPSHUFDYri,      X86::VPSHUFDYmi,          0 },
542252723Sdim    { X86::VPSHUFHWYri,     X86::VPSHUFHWYmi,         0 },
543252723Sdim    { X86::VPSHUFLWYri,     X86::VPSHUFLWYmi,         0 },
544252723Sdim    { X86::VRCPPSYr,        X86::VRCPPSYm,            0 },
545252723Sdim    { X86::VRCPPSYr_Int,    X86::VRCPPSYm_Int,        0 },
546252723Sdim    { X86::VRSQRTPSYr,      X86::VRSQRTPSYm,          0 },
547252723Sdim    { X86::VSQRTPDYr,       X86::VSQRTPDYm,           0 },
548252723Sdim    { X86::VSQRTPSYr,       X86::VSQRTPSYm,           0 },
549245431Sdim    { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm,     TB_NO_REVERSE },
550245431Sdim    { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm,     TB_NO_REVERSE },
551245431Sdim
552263509Sdim    // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions
553252723Sdim    { X86::BEXTR32rr,       X86::BEXTR32rm,           0 },
554252723Sdim    { X86::BEXTR64rr,       X86::BEXTR64rm,           0 },
555263509Sdim    { X86::BEXTRI32ri,      X86::BEXTRI32mi,          0 },
556263509Sdim    { X86::BEXTRI64ri,      X86::BEXTRI64mi,          0 },
557263509Sdim    { X86::BLCFILL32rr,     X86::BLCFILL32rm,         0 },
558263509Sdim    { X86::BLCFILL64rr,     X86::BLCFILL64rm,         0 },
559263509Sdim    { X86::BLCI32rr,        X86::BLCI32rm,            0 },
560263509Sdim    { X86::BLCI64rr,        X86::BLCI64rm,            0 },
561263509Sdim    { X86::BLCIC32rr,       X86::BLCIC32rm,           0 },
562263509Sdim    { X86::BLCIC64rr,       X86::BLCIC64rm,           0 },
563263509Sdim    { X86::BLCMSK32rr,      X86::BLCMSK32rm,          0 },
564263509Sdim    { X86::BLCMSK64rr,      X86::BLCMSK64rm,          0 },
565263509Sdim    { X86::BLCS32rr,        X86::BLCS32rm,            0 },
566263509Sdim    { X86::BLCS64rr,        X86::BLCS64rm,            0 },
567263509Sdim    { X86::BLSFILL32rr,     X86::BLSFILL32rm,         0 },
568263509Sdim    { X86::BLSFILL64rr,     X86::BLSFILL64rm,         0 },
569252723Sdim    { X86::BLSI32rr,        X86::BLSI32rm,            0 },
570252723Sdim    { X86::BLSI64rr,        X86::BLSI64rm,            0 },
571263509Sdim    { X86::BLSIC32rr,       X86::BLSIC32rm,           0 },
572263509Sdim    { X86::BLSIC64rr,       X86::BLSIC64rm,           0 },
573252723Sdim    { X86::BLSMSK32rr,      X86::BLSMSK32rm,          0 },
574252723Sdim    { X86::BLSMSK64rr,      X86::BLSMSK64rm,          0 },
575252723Sdim    { X86::BLSR32rr,        X86::BLSR32rm,            0 },
576252723Sdim    { X86::BLSR64rr,        X86::BLSR64rm,            0 },
577252723Sdim    { X86::BZHI32rr,        X86::BZHI32rm,            0 },
578252723Sdim    { X86::BZHI64rr,        X86::BZHI64rm,            0 },
579252723Sdim    { X86::LZCNT16rr,       X86::LZCNT16rm,           0 },
580252723Sdim    { X86::LZCNT32rr,       X86::LZCNT32rm,           0 },
581252723Sdim    { X86::LZCNT64rr,       X86::LZCNT64rm,           0 },
582252723Sdim    { X86::POPCNT16rr,      X86::POPCNT16rm,          0 },
583252723Sdim    { X86::POPCNT32rr,      X86::POPCNT32rm,          0 },
584252723Sdim    { X86::POPCNT64rr,      X86::POPCNT64rm,          0 },
585245431Sdim    { X86::RORX32ri,        X86::RORX32mi,            0 },
586245431Sdim    { X86::RORX64ri,        X86::RORX64mi,            0 },
587245431Sdim    { X86::SARX32rr,        X86::SARX32rm,            0 },
588245431Sdim    { X86::SARX64rr,        X86::SARX64rm,            0 },
589245431Sdim    { X86::SHRX32rr,        X86::SHRX32rm,            0 },
590245431Sdim    { X86::SHRX64rr,        X86::SHRX64rm,            0 },
591245431Sdim    { X86::SHLX32rr,        X86::SHLX32rm,            0 },
592245431Sdim    { X86::SHLX64rr,        X86::SHLX64rm,            0 },
593263509Sdim    { X86::T1MSKC32rr,      X86::T1MSKC32rm,          0 },
594263509Sdim    { X86::T1MSKC64rr,      X86::T1MSKC64rm,          0 },
595252723Sdim    { X86::TZCNT16rr,       X86::TZCNT16rm,           0 },
596252723Sdim    { X86::TZCNT32rr,       X86::TZCNT32rm,           0 },
597252723Sdim    { X86::TZCNT64rr,       X86::TZCNT64rm,           0 },
598263509Sdim    { X86::TZMSK32rr,       X86::TZMSK32rm,           0 },
599263509Sdim    { X86::TZMSK64rr,       X86::TZMSK64rm,           0 },
600263509Sdim
601263509Sdim    // AVX-512 foldable instructions
602263509Sdim    { X86::VMOV64toPQIZrr,  X86::VMOVQI2PQIZrm,       0 },
603263509Sdim    { X86::VMOVDI2SSZrr,    X86::VMOVDI2SSZrm,        0 },
604263509Sdim    { X86::VMOVDQA32rr,     X86::VMOVDQA32rm,         TB_ALIGN_64 },
605263509Sdim    { X86::VMOVDQA64rr,     X86::VMOVDQA64rm,         TB_ALIGN_64 },
606263509Sdim    { X86::VMOVDQU32rr,     X86::VMOVDQU32rm,         0 },
607263509Sdim    { X86::VMOVDQU64rr,     X86::VMOVDQU64rm,         0 },
608263509Sdim
609263509Sdim    // AES foldable instructions
610263509Sdim    { X86::AESIMCrr,              X86::AESIMCrm,              TB_ALIGN_16 },
611263509Sdim    { X86::AESKEYGENASSIST128rr,  X86::AESKEYGENASSIST128rm,  TB_ALIGN_16 },
612263509Sdim    { X86::VAESIMCrr,             X86::VAESIMCrm,             TB_ALIGN_16 },
613263509Sdim    { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, TB_ALIGN_16 },
614193323Sed  };
615193323Sed
616193323Sed  for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
617235633Sdim    unsigned RegOp = OpTbl1[i].RegOp;
618235633Sdim    unsigned MemOp = OpTbl1[i].MemOp;
619235633Sdim    unsigned Flags = OpTbl1[i].Flags;
620226890Sdim    AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
621226890Sdim                  RegOp, MemOp,
622226890Sdim                  // Index 1, folded load
623226890Sdim                  Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
624193323Sed  }
625193323Sed
626235633Sdim  static const X86OpTblEntry OpTbl2[] = {
627226890Sdim    { X86::ADC32rr,         X86::ADC32rm,       0 },
628226890Sdim    { X86::ADC64rr,         X86::ADC64rm,       0 },
629226890Sdim    { X86::ADD16rr,         X86::ADD16rm,       0 },
630226890Sdim    { X86::ADD16rr_DB,      X86::ADD16rm,       TB_NO_REVERSE },
631226890Sdim    { X86::ADD32rr,         X86::ADD32rm,       0 },
632226890Sdim    { X86::ADD32rr_DB,      X86::ADD32rm,       TB_NO_REVERSE },
633226890Sdim    { X86::ADD64rr,         X86::ADD64rm,       0 },
634226890Sdim    { X86::ADD64rr_DB,      X86::ADD64rm,       TB_NO_REVERSE },
635226890Sdim    { X86::ADD8rr,          X86::ADD8rm,        0 },
636226890Sdim    { X86::ADDPDrr,         X86::ADDPDrm,       TB_ALIGN_16 },
637226890Sdim    { X86::ADDPSrr,         X86::ADDPSrm,       TB_ALIGN_16 },
638226890Sdim    { X86::ADDSDrr,         X86::ADDSDrm,       0 },
639226890Sdim    { X86::ADDSSrr,         X86::ADDSSrm,       0 },
640226890Sdim    { X86::ADDSUBPDrr,      X86::ADDSUBPDrm,    TB_ALIGN_16 },
641226890Sdim    { X86::ADDSUBPSrr,      X86::ADDSUBPSrm,    TB_ALIGN_16 },
642226890Sdim    { X86::AND16rr,         X86::AND16rm,       0 },
643226890Sdim    { X86::AND32rr,         X86::AND32rm,       0 },
644226890Sdim    { X86::AND64rr,         X86::AND64rm,       0 },
645226890Sdim    { X86::AND8rr,          X86::AND8rm,        0 },
646226890Sdim    { X86::ANDNPDrr,        X86::ANDNPDrm,      TB_ALIGN_16 },
647226890Sdim    { X86::ANDNPSrr,        X86::ANDNPSrm,      TB_ALIGN_16 },
648226890Sdim    { X86::ANDPDrr,         X86::ANDPDrm,       TB_ALIGN_16 },
649226890Sdim    { X86::ANDPSrr,         X86::ANDPSrm,       TB_ALIGN_16 },
650235633Sdim    { X86::BLENDPDrri,      X86::BLENDPDrmi,    TB_ALIGN_16 },
651235633Sdim    { X86::BLENDPSrri,      X86::BLENDPSrmi,    TB_ALIGN_16 },
652235633Sdim    { X86::BLENDVPDrr0,     X86::BLENDVPDrm0,   TB_ALIGN_16 },
653235633Sdim    { X86::BLENDVPSrr0,     X86::BLENDVPSrm0,   TB_ALIGN_16 },
654226890Sdim    { X86::CMOVA16rr,       X86::CMOVA16rm,     0 },
655226890Sdim    { X86::CMOVA32rr,       X86::CMOVA32rm,     0 },
656226890Sdim    { X86::CMOVA64rr,       X86::CMOVA64rm,     0 },
657226890Sdim    { X86::CMOVAE16rr,      X86::CMOVAE16rm,    0 },
658226890Sdim    { X86::CMOVAE32rr,      X86::CMOVAE32rm,    0 },
659226890Sdim    { X86::CMOVAE64rr,      X86::CMOVAE64rm,    0 },
660226890Sdim    { X86::CMOVB16rr,       X86::CMOVB16rm,     0 },
661226890Sdim    { X86::CMOVB32rr,       X86::CMOVB32rm,     0 },
662226890Sdim    { X86::CMOVB64rr,       X86::CMOVB64rm,     0 },
663226890Sdim    { X86::CMOVBE16rr,      X86::CMOVBE16rm,    0 },
664226890Sdim    { X86::CMOVBE32rr,      X86::CMOVBE32rm,    0 },
665226890Sdim    { X86::CMOVBE64rr,      X86::CMOVBE64rm,    0 },
666226890Sdim    { X86::CMOVE16rr,       X86::CMOVE16rm,     0 },
667226890Sdim    { X86::CMOVE32rr,       X86::CMOVE32rm,     0 },
668226890Sdim    { X86::CMOVE64rr,       X86::CMOVE64rm,     0 },
669226890Sdim    { X86::CMOVG16rr,       X86::CMOVG16rm,     0 },
670226890Sdim    { X86::CMOVG32rr,       X86::CMOVG32rm,     0 },
671226890Sdim    { X86::CMOVG64rr,       X86::CMOVG64rm,     0 },
672226890Sdim    { X86::CMOVGE16rr,      X86::CMOVGE16rm,    0 },
673226890Sdim    { X86::CMOVGE32rr,      X86::CMOVGE32rm,    0 },
674226890Sdim    { X86::CMOVGE64rr,      X86::CMOVGE64rm,    0 },
675226890Sdim    { X86::CMOVL16rr,       X86::CMOVL16rm,     0 },
676226890Sdim    { X86::CMOVL32rr,       X86::CMOVL32rm,     0 },
677226890Sdim    { X86::CMOVL64rr,       X86::CMOVL64rm,     0 },
678226890Sdim    { X86::CMOVLE16rr,      X86::CMOVLE16rm,    0 },
679226890Sdim    { X86::CMOVLE32rr,      X86::CMOVLE32rm,    0 },
680226890Sdim    { X86::CMOVLE64rr,      X86::CMOVLE64rm,    0 },
681226890Sdim    { X86::CMOVNE16rr,      X86::CMOVNE16rm,    0 },
682226890Sdim    { X86::CMOVNE32rr,      X86::CMOVNE32rm,    0 },
683226890Sdim    { X86::CMOVNE64rr,      X86::CMOVNE64rm,    0 },
684226890Sdim    { X86::CMOVNO16rr,      X86::CMOVNO16rm,    0 },
685226890Sdim    { X86::CMOVNO32rr,      X86::CMOVNO32rm,    0 },
686226890Sdim    { X86::CMOVNO64rr,      X86::CMOVNO64rm,    0 },
687226890Sdim    { X86::CMOVNP16rr,      X86::CMOVNP16rm,    0 },
688226890Sdim    { X86::CMOVNP32rr,      X86::CMOVNP32rm,    0 },
689226890Sdim    { X86::CMOVNP64rr,      X86::CMOVNP64rm,    0 },
690226890Sdim    { X86::CMOVNS16rr,      X86::CMOVNS16rm,    0 },
691226890Sdim    { X86::CMOVNS32rr,      X86::CMOVNS32rm,    0 },
692226890Sdim    { X86::CMOVNS64rr,      X86::CMOVNS64rm,    0 },
693226890Sdim    { X86::CMOVO16rr,       X86::CMOVO16rm,     0 },
694226890Sdim    { X86::CMOVO32rr,       X86::CMOVO32rm,     0 },
695226890Sdim    { X86::CMOVO64rr,       X86::CMOVO64rm,     0 },
696226890Sdim    { X86::CMOVP16rr,       X86::CMOVP16rm,     0 },
697226890Sdim    { X86::CMOVP32rr,       X86::CMOVP32rm,     0 },
698226890Sdim    { X86::CMOVP64rr,       X86::CMOVP64rm,     0 },
699226890Sdim    { X86::CMOVS16rr,       X86::CMOVS16rm,     0 },
700226890Sdim    { X86::CMOVS32rr,       X86::CMOVS32rm,     0 },
701226890Sdim    { X86::CMOVS64rr,       X86::CMOVS64rm,     0 },
702226890Sdim    { X86::CMPPDrri,        X86::CMPPDrmi,      TB_ALIGN_16 },
703226890Sdim    { X86::CMPPSrri,        X86::CMPPSrmi,      TB_ALIGN_16 },
704226890Sdim    { X86::CMPSDrr,         X86::CMPSDrm,       0 },
705226890Sdim    { X86::CMPSSrr,         X86::CMPSSrm,       0 },
706226890Sdim    { X86::DIVPDrr,         X86::DIVPDrm,       TB_ALIGN_16 },
707226890Sdim    { X86::DIVPSrr,         X86::DIVPSrm,       TB_ALIGN_16 },
708226890Sdim    { X86::DIVSDrr,         X86::DIVSDrm,       0 },
709226890Sdim    { X86::DIVSSrr,         X86::DIVSSrm,       0 },
710226890Sdim    { X86::FsANDNPDrr,      X86::FsANDNPDrm,    TB_ALIGN_16 },
711226890Sdim    { X86::FsANDNPSrr,      X86::FsANDNPSrm,    TB_ALIGN_16 },
712226890Sdim    { X86::FsANDPDrr,       X86::FsANDPDrm,     TB_ALIGN_16 },
713226890Sdim    { X86::FsANDPSrr,       X86::FsANDPSrm,     TB_ALIGN_16 },
714226890Sdim    { X86::FsORPDrr,        X86::FsORPDrm,      TB_ALIGN_16 },
715226890Sdim    { X86::FsORPSrr,        X86::FsORPSrm,      TB_ALIGN_16 },
716226890Sdim    { X86::FsXORPDrr,       X86::FsXORPDrm,     TB_ALIGN_16 },
717226890Sdim    { X86::FsXORPSrr,       X86::FsXORPSrm,     TB_ALIGN_16 },
718226890Sdim    { X86::HADDPDrr,        X86::HADDPDrm,      TB_ALIGN_16 },
719226890Sdim    { X86::HADDPSrr,        X86::HADDPSrm,      TB_ALIGN_16 },
720226890Sdim    { X86::HSUBPDrr,        X86::HSUBPDrm,      TB_ALIGN_16 },
721226890Sdim    { X86::HSUBPSrr,        X86::HSUBPSrm,      TB_ALIGN_16 },
722226890Sdim    { X86::IMUL16rr,        X86::IMUL16rm,      0 },
723226890Sdim    { X86::IMUL32rr,        X86::IMUL32rm,      0 },
724226890Sdim    { X86::IMUL64rr,        X86::IMUL64rm,      0 },
725226890Sdim    { X86::Int_CMPSDrr,     X86::Int_CMPSDrm,   0 },
726226890Sdim    { X86::Int_CMPSSrr,     X86::Int_CMPSSrm,   0 },
727245431Sdim    { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm,      0 },
728245431Sdim    { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm,    0 },
729245431Sdim    { X86::Int_CVTSI2SDrr,  X86::Int_CVTSI2SDrm,      0 },
730245431Sdim    { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm,    0 },
731245431Sdim    { X86::Int_CVTSI2SSrr,  X86::Int_CVTSI2SSrm,      0 },
732245431Sdim    { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm,      0 },
733226890Sdim    { X86::MAXPDrr,         X86::MAXPDrm,       TB_ALIGN_16 },
734226890Sdim    { X86::MAXPSrr,         X86::MAXPSrm,       TB_ALIGN_16 },
735226890Sdim    { X86::MAXSDrr,         X86::MAXSDrm,       0 },
736226890Sdim    { X86::MAXSSrr,         X86::MAXSSrm,       0 },
737226890Sdim    { X86::MINPDrr,         X86::MINPDrm,       TB_ALIGN_16 },
738226890Sdim    { X86::MINPSrr,         X86::MINPSrm,       TB_ALIGN_16 },
739226890Sdim    { X86::MINSDrr,         X86::MINSDrm,       0 },
740226890Sdim    { X86::MINSSrr,         X86::MINSSrm,       0 },
741235633Sdim    { X86::MPSADBWrri,      X86::MPSADBWrmi,    TB_ALIGN_16 },
742226890Sdim    { X86::MULPDrr,         X86::MULPDrm,       TB_ALIGN_16 },
743226890Sdim    { X86::MULPSrr,         X86::MULPSrm,       TB_ALIGN_16 },
744226890Sdim    { X86::MULSDrr,         X86::MULSDrm,       0 },
745226890Sdim    { X86::MULSSrr,         X86::MULSSrm,       0 },
746226890Sdim    { X86::OR16rr,          X86::OR16rm,        0 },
747226890Sdim    { X86::OR32rr,          X86::OR32rm,        0 },
748226890Sdim    { X86::OR64rr,          X86::OR64rm,        0 },
749226890Sdim    { X86::OR8rr,           X86::OR8rm,         0 },
750226890Sdim    { X86::ORPDrr,          X86::ORPDrm,        TB_ALIGN_16 },
751226890Sdim    { X86::ORPSrr,          X86::ORPSrm,        TB_ALIGN_16 },
752226890Sdim    { X86::PACKSSDWrr,      X86::PACKSSDWrm,    TB_ALIGN_16 },
753226890Sdim    { X86::PACKSSWBrr,      X86::PACKSSWBrm,    TB_ALIGN_16 },
754235633Sdim    { X86::PACKUSDWrr,      X86::PACKUSDWrm,    TB_ALIGN_16 },
755226890Sdim    { X86::PACKUSWBrr,      X86::PACKUSWBrm,    TB_ALIGN_16 },
756226890Sdim    { X86::PADDBrr,         X86::PADDBrm,       TB_ALIGN_16 },
757226890Sdim    { X86::PADDDrr,         X86::PADDDrm,       TB_ALIGN_16 },
758226890Sdim    { X86::PADDQrr,         X86::PADDQrm,       TB_ALIGN_16 },
759226890Sdim    { X86::PADDSBrr,        X86::PADDSBrm,      TB_ALIGN_16 },
760226890Sdim    { X86::PADDSWrr,        X86::PADDSWrm,      TB_ALIGN_16 },
761235633Sdim    { X86::PADDUSBrr,       X86::PADDUSBrm,     TB_ALIGN_16 },
762235633Sdim    { X86::PADDUSWrr,       X86::PADDUSWrm,     TB_ALIGN_16 },
763226890Sdim    { X86::PADDWrr,         X86::PADDWrm,       TB_ALIGN_16 },
764235633Sdim    { X86::PALIGNR128rr,    X86::PALIGNR128rm,  TB_ALIGN_16 },
765226890Sdim    { X86::PANDNrr,         X86::PANDNrm,       TB_ALIGN_16 },
766226890Sdim    { X86::PANDrr,          X86::PANDrm,        TB_ALIGN_16 },
767226890Sdim    { X86::PAVGBrr,         X86::PAVGBrm,       TB_ALIGN_16 },
768226890Sdim    { X86::PAVGWrr,         X86::PAVGWrm,       TB_ALIGN_16 },
769235633Sdim    { X86::PBLENDWrri,      X86::PBLENDWrmi,    TB_ALIGN_16 },
770226890Sdim    { X86::PCMPEQBrr,       X86::PCMPEQBrm,     TB_ALIGN_16 },
771226890Sdim    { X86::PCMPEQDrr,       X86::PCMPEQDrm,     TB_ALIGN_16 },
772235633Sdim    { X86::PCMPEQQrr,       X86::PCMPEQQrm,     TB_ALIGN_16 },
773226890Sdim    { X86::PCMPEQWrr,       X86::PCMPEQWrm,     TB_ALIGN_16 },
774226890Sdim    { X86::PCMPGTBrr,       X86::PCMPGTBrm,     TB_ALIGN_16 },
775226890Sdim    { X86::PCMPGTDrr,       X86::PCMPGTDrm,     TB_ALIGN_16 },
776235633Sdim    { X86::PCMPGTQrr,       X86::PCMPGTQrm,     TB_ALIGN_16 },
777226890Sdim    { X86::PCMPGTWrr,       X86::PCMPGTWrm,     TB_ALIGN_16 },
778235633Sdim    { X86::PHADDDrr,        X86::PHADDDrm,      TB_ALIGN_16 },
779235633Sdim    { X86::PHADDWrr,        X86::PHADDWrm,      TB_ALIGN_16 },
780235633Sdim    { X86::PHADDSWrr128,    X86::PHADDSWrm128,  TB_ALIGN_16 },
781235633Sdim    { X86::PHSUBDrr,        X86::PHSUBDrm,      TB_ALIGN_16 },
782235633Sdim    { X86::PHSUBSWrr128,    X86::PHSUBSWrm128,  TB_ALIGN_16 },
783235633Sdim    { X86::PHSUBWrr,        X86::PHSUBWrm,      TB_ALIGN_16 },
784226890Sdim    { X86::PINSRWrri,       X86::PINSRWrmi,     TB_ALIGN_16 },
785235633Sdim    { X86::PMADDUBSWrr128,  X86::PMADDUBSWrm128, TB_ALIGN_16 },
786226890Sdim    { X86::PMADDWDrr,       X86::PMADDWDrm,     TB_ALIGN_16 },
787226890Sdim    { X86::PMAXSWrr,        X86::PMAXSWrm,      TB_ALIGN_16 },
788226890Sdim    { X86::PMAXUBrr,        X86::PMAXUBrm,      TB_ALIGN_16 },
789226890Sdim    { X86::PMINSWrr,        X86::PMINSWrm,      TB_ALIGN_16 },
790226890Sdim    { X86::PMINUBrr,        X86::PMINUBrm,      TB_ALIGN_16 },
791252723Sdim    { X86::PMINSBrr,        X86::PMINSBrm,      TB_ALIGN_16 },
792252723Sdim    { X86::PMINSDrr,        X86::PMINSDrm,      TB_ALIGN_16 },
793252723Sdim    { X86::PMINUDrr,        X86::PMINUDrm,      TB_ALIGN_16 },
794252723Sdim    { X86::PMINUWrr,        X86::PMINUWrm,      TB_ALIGN_16 },
795252723Sdim    { X86::PMAXSBrr,        X86::PMAXSBrm,      TB_ALIGN_16 },
796252723Sdim    { X86::PMAXSDrr,        X86::PMAXSDrm,      TB_ALIGN_16 },
797252723Sdim    { X86::PMAXUDrr,        X86::PMAXUDrm,      TB_ALIGN_16 },
798252723Sdim    { X86::PMAXUWrr,        X86::PMAXUWrm,      TB_ALIGN_16 },
799226890Sdim    { X86::PMULDQrr,        X86::PMULDQrm,      TB_ALIGN_16 },
800235633Sdim    { X86::PMULHRSWrr128,   X86::PMULHRSWrm128, TB_ALIGN_16 },
801226890Sdim    { X86::PMULHUWrr,       X86::PMULHUWrm,     TB_ALIGN_16 },
802226890Sdim    { X86::PMULHWrr,        X86::PMULHWrm,      TB_ALIGN_16 },
803226890Sdim    { X86::PMULLDrr,        X86::PMULLDrm,      TB_ALIGN_16 },
804226890Sdim    { X86::PMULLWrr,        X86::PMULLWrm,      TB_ALIGN_16 },
805226890Sdim    { X86::PMULUDQrr,       X86::PMULUDQrm,     TB_ALIGN_16 },
806226890Sdim    { X86::PORrr,           X86::PORrm,         TB_ALIGN_16 },
807226890Sdim    { X86::PSADBWrr,        X86::PSADBWrm,      TB_ALIGN_16 },
808235633Sdim    { X86::PSHUFBrr,        X86::PSHUFBrm,      TB_ALIGN_16 },
809235633Sdim    { X86::PSIGNBrr,        X86::PSIGNBrm,      TB_ALIGN_16 },
810235633Sdim    { X86::PSIGNWrr,        X86::PSIGNWrm,      TB_ALIGN_16 },
811235633Sdim    { X86::PSIGNDrr,        X86::PSIGNDrm,      TB_ALIGN_16 },
812226890Sdim    { X86::PSLLDrr,         X86::PSLLDrm,       TB_ALIGN_16 },
813226890Sdim    { X86::PSLLQrr,         X86::PSLLQrm,       TB_ALIGN_16 },
814226890Sdim    { X86::PSLLWrr,         X86::PSLLWrm,       TB_ALIGN_16 },
815226890Sdim    { X86::PSRADrr,         X86::PSRADrm,       TB_ALIGN_16 },
816226890Sdim    { X86::PSRAWrr,         X86::PSRAWrm,       TB_ALIGN_16 },
817226890Sdim    { X86::PSRLDrr,         X86::PSRLDrm,       TB_ALIGN_16 },
818226890Sdim    { X86::PSRLQrr,         X86::PSRLQrm,       TB_ALIGN_16 },
819226890Sdim    { X86::PSRLWrr,         X86::PSRLWrm,       TB_ALIGN_16 },
820226890Sdim    { X86::PSUBBrr,         X86::PSUBBrm,       TB_ALIGN_16 },
821226890Sdim    { X86::PSUBDrr,         X86::PSUBDrm,       TB_ALIGN_16 },
822226890Sdim    { X86::PSUBSBrr,        X86::PSUBSBrm,      TB_ALIGN_16 },
823226890Sdim    { X86::PSUBSWrr,        X86::PSUBSWrm,      TB_ALIGN_16 },
824226890Sdim    { X86::PSUBWrr,         X86::PSUBWrm,       TB_ALIGN_16 },
825226890Sdim    { X86::PUNPCKHBWrr,     X86::PUNPCKHBWrm,   TB_ALIGN_16 },
826226890Sdim    { X86::PUNPCKHDQrr,     X86::PUNPCKHDQrm,   TB_ALIGN_16 },
827226890Sdim    { X86::PUNPCKHQDQrr,    X86::PUNPCKHQDQrm,  TB_ALIGN_16 },
828226890Sdim    { X86::PUNPCKHWDrr,     X86::PUNPCKHWDrm,   TB_ALIGN_16 },
829226890Sdim    { X86::PUNPCKLBWrr,     X86::PUNPCKLBWrm,   TB_ALIGN_16 },
830226890Sdim    { X86::PUNPCKLDQrr,     X86::PUNPCKLDQrm,   TB_ALIGN_16 },
831226890Sdim    { X86::PUNPCKLQDQrr,    X86::PUNPCKLQDQrm,  TB_ALIGN_16 },
832226890Sdim    { X86::PUNPCKLWDrr,     X86::PUNPCKLWDrm,   TB_ALIGN_16 },
833226890Sdim    { X86::PXORrr,          X86::PXORrm,        TB_ALIGN_16 },
834226890Sdim    { X86::SBB32rr,         X86::SBB32rm,       0 },
835226890Sdim    { X86::SBB64rr,         X86::SBB64rm,       0 },
836226890Sdim    { X86::SHUFPDrri,       X86::SHUFPDrmi,     TB_ALIGN_16 },
837226890Sdim    { X86::SHUFPSrri,       X86::SHUFPSrmi,     TB_ALIGN_16 },
838226890Sdim    { X86::SUB16rr,         X86::SUB16rm,       0 },
839226890Sdim    { X86::SUB32rr,         X86::SUB32rm,       0 },
840226890Sdim    { X86::SUB64rr,         X86::SUB64rm,       0 },
841226890Sdim    { X86::SUB8rr,          X86::SUB8rm,        0 },
842226890Sdim    { X86::SUBPDrr,         X86::SUBPDrm,       TB_ALIGN_16 },
843226890Sdim    { X86::SUBPSrr,         X86::SUBPSrm,       TB_ALIGN_16 },
844226890Sdim    { X86::SUBSDrr,         X86::SUBSDrm,       0 },
845226890Sdim    { X86::SUBSSrr,         X86::SUBSSrm,       0 },
846193323Sed    // FIXME: TEST*rr -> swapped operand of TEST*mr.
847226890Sdim    { X86::UNPCKHPDrr,      X86::UNPCKHPDrm,    TB_ALIGN_16 },
848226890Sdim    { X86::UNPCKHPSrr,      X86::UNPCKHPSrm,    TB_ALIGN_16 },
849226890Sdim    { X86::UNPCKLPDrr,      X86::UNPCKLPDrm,    TB_ALIGN_16 },
850226890Sdim    { X86::UNPCKLPSrr,      X86::UNPCKLPSrm,    TB_ALIGN_16 },
851226890Sdim    { X86::XOR16rr,         X86::XOR16rm,       0 },
852226890Sdim    { X86::XOR32rr,         X86::XOR32rm,       0 },
853226890Sdim    { X86::XOR64rr,         X86::XOR64rm,       0 },
854226890Sdim    { X86::XOR8rr,          X86::XOR8rm,        0 },
855226890Sdim    { X86::XORPDrr,         X86::XORPDrm,       TB_ALIGN_16 },
856226890Sdim    { X86::XORPSrr,         X86::XORPSrm,       TB_ALIGN_16 },
857226890Sdim    // AVX 128-bit versions of foldable instructions
858226890Sdim    { X86::VCVTSD2SSrr,       X86::VCVTSD2SSrm,        0 },
859226890Sdim    { X86::Int_VCVTSD2SSrr,   X86::Int_VCVTSD2SSrm,    0 },
860226890Sdim    { X86::VCVTSI2SD64rr,     X86::VCVTSI2SD64rm,      0 },
861226890Sdim    { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm,  0 },
862226890Sdim    { X86::VCVTSI2SDrr,       X86::VCVTSI2SDrm,        0 },
863226890Sdim    { X86::Int_VCVTSI2SDrr,   X86::Int_VCVTSI2SDrm,    0 },
864226890Sdim    { X86::VCVTSI2SS64rr,     X86::VCVTSI2SS64rm,      0 },
865226890Sdim    { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm,  0 },
866226890Sdim    { X86::VCVTSI2SSrr,       X86::VCVTSI2SSrm,        0 },
867226890Sdim    { X86::Int_VCVTSI2SSrr,   X86::Int_VCVTSI2SSrm,    0 },
868226890Sdim    { X86::VCVTSS2SDrr,       X86::VCVTSS2SDrm,        0 },
869226890Sdim    { X86::Int_VCVTSS2SDrr,   X86::Int_VCVTSS2SDrm,    0 },
870252723Sdim    { X86::VCVTTPD2DQrr,      X86::VCVTTPD2DQXrm,      0 },
871252723Sdim    { X86::VCVTTPS2DQrr,      X86::VCVTTPS2DQrm,       0 },
872226890Sdim    { X86::VRSQRTSSr,         X86::VRSQRTSSm,          0 },
873226890Sdim    { X86::VSQRTSDr,          X86::VSQRTSDm,           0 },
874226890Sdim    { X86::VSQRTSSr,          X86::VSQRTSSm,           0 },
875252723Sdim    { X86::VADDPDrr,          X86::VADDPDrm,           0 },
876252723Sdim    { X86::VADDPSrr,          X86::VADDPSrm,           0 },
877226890Sdim    { X86::VADDSDrr,          X86::VADDSDrm,           0 },
878226890Sdim    { X86::VADDSSrr,          X86::VADDSSrm,           0 },
879252723Sdim    { X86::VADDSUBPDrr,       X86::VADDSUBPDrm,        0 },
880252723Sdim    { X86::VADDSUBPSrr,       X86::VADDSUBPSrm,        0 },
881252723Sdim    { X86::VANDNPDrr,         X86::VANDNPDrm,          0 },
882252723Sdim    { X86::VANDNPSrr,         X86::VANDNPSrm,          0 },
883252723Sdim    { X86::VANDPDrr,          X86::VANDPDrm,           0 },
884252723Sdim    { X86::VANDPSrr,          X86::VANDPSrm,           0 },
885252723Sdim    { X86::VBLENDPDrri,       X86::VBLENDPDrmi,        0 },
886252723Sdim    { X86::VBLENDPSrri,       X86::VBLENDPSrmi,        0 },
887252723Sdim    { X86::VBLENDVPDrr,       X86::VBLENDVPDrm,        0 },
888252723Sdim    { X86::VBLENDVPSrr,       X86::VBLENDVPSrm,        0 },
889252723Sdim    { X86::VCMPPDrri,         X86::VCMPPDrmi,          0 },
890252723Sdim    { X86::VCMPPSrri,         X86::VCMPPSrmi,          0 },
891226890Sdim    { X86::VCMPSDrr,          X86::VCMPSDrm,           0 },
892226890Sdim    { X86::VCMPSSrr,          X86::VCMPSSrm,           0 },
893252723Sdim    { X86::VDIVPDrr,          X86::VDIVPDrm,           0 },
894252723Sdim    { X86::VDIVPSrr,          X86::VDIVPSrm,           0 },
895226890Sdim    { X86::VDIVSDrr,          X86::VDIVSDrm,           0 },
896226890Sdim    { X86::VDIVSSrr,          X86::VDIVSSrm,           0 },
897226890Sdim    { X86::VFsANDNPDrr,       X86::VFsANDNPDrm,        TB_ALIGN_16 },
898226890Sdim    { X86::VFsANDNPSrr,       X86::VFsANDNPSrm,        TB_ALIGN_16 },
899226890Sdim    { X86::VFsANDPDrr,        X86::VFsANDPDrm,         TB_ALIGN_16 },
900226890Sdim    { X86::VFsANDPSrr,        X86::VFsANDPSrm,         TB_ALIGN_16 },
901226890Sdim    { X86::VFsORPDrr,         X86::VFsORPDrm,          TB_ALIGN_16 },
902226890Sdim    { X86::VFsORPSrr,         X86::VFsORPSrm,          TB_ALIGN_16 },
903226890Sdim    { X86::VFsXORPDrr,        X86::VFsXORPDrm,         TB_ALIGN_16 },
904226890Sdim    { X86::VFsXORPSrr,        X86::VFsXORPSrm,         TB_ALIGN_16 },
905252723Sdim    { X86::VHADDPDrr,         X86::VHADDPDrm,          0 },
906252723Sdim    { X86::VHADDPSrr,         X86::VHADDPSrm,          0 },
907252723Sdim    { X86::VHSUBPDrr,         X86::VHSUBPDrm,          0 },
908252723Sdim    { X86::VHSUBPSrr,         X86::VHSUBPSrm,          0 },
909226890Sdim    { X86::Int_VCMPSDrr,      X86::Int_VCMPSDrm,       0 },
910226890Sdim    { X86::Int_VCMPSSrr,      X86::Int_VCMPSSrm,       0 },
911252723Sdim    { X86::VMAXPDrr,          X86::VMAXPDrm,           0 },
912252723Sdim    { X86::VMAXPSrr,          X86::VMAXPSrm,           0 },
913226890Sdim    { X86::VMAXSDrr,          X86::VMAXSDrm,           0 },
914226890Sdim    { X86::VMAXSSrr,          X86::VMAXSSrm,           0 },
915252723Sdim    { X86::VMINPDrr,          X86::VMINPDrm,           0 },
916252723Sdim    { X86::VMINPSrr,          X86::VMINPSrm,           0 },
917226890Sdim    { X86::VMINSDrr,          X86::VMINSDrm,           0 },
918226890Sdim    { X86::VMINSSrr,          X86::VMINSSrm,           0 },
919252723Sdim    { X86::VMPSADBWrri,       X86::VMPSADBWrmi,        0 },
920252723Sdim    { X86::VMULPDrr,          X86::VMULPDrm,           0 },
921252723Sdim    { X86::VMULPSrr,          X86::VMULPSrm,           0 },
922226890Sdim    { X86::VMULSDrr,          X86::VMULSDrm,           0 },
923226890Sdim    { X86::VMULSSrr,          X86::VMULSSrm,           0 },
924252723Sdim    { X86::VORPDrr,           X86::VORPDrm,            0 },
925252723Sdim    { X86::VORPSrr,           X86::VORPSrm,            0 },
926252723Sdim    { X86::VPACKSSDWrr,       X86::VPACKSSDWrm,        0 },
927252723Sdim    { X86::VPACKSSWBrr,       X86::VPACKSSWBrm,        0 },
928252723Sdim    { X86::VPACKUSDWrr,       X86::VPACKUSDWrm,        0 },
929252723Sdim    { X86::VPACKUSWBrr,       X86::VPACKUSWBrm,        0 },
930252723Sdim    { X86::VPADDBrr,          X86::VPADDBrm,           0 },
931252723Sdim    { X86::VPADDDrr,          X86::VPADDDrm,           0 },
932252723Sdim    { X86::VPADDQrr,          X86::VPADDQrm,           0 },
933252723Sdim    { X86::VPADDSBrr,         X86::VPADDSBrm,          0 },
934252723Sdim    { X86::VPADDSWrr,         X86::VPADDSWrm,          0 },
935252723Sdim    { X86::VPADDUSBrr,        X86::VPADDUSBrm,         0 },
936252723Sdim    { X86::VPADDUSWrr,        X86::VPADDUSWrm,         0 },
937252723Sdim    { X86::VPADDWrr,          X86::VPADDWrm,           0 },
938252723Sdim    { X86::VPALIGNR128rr,     X86::VPALIGNR128rm,      0 },
939252723Sdim    { X86::VPANDNrr,          X86::VPANDNrm,           0 },
940252723Sdim    { X86::VPANDrr,           X86::VPANDrm,            0 },
941252723Sdim    { X86::VPAVGBrr,          X86::VPAVGBrm,           0 },
942252723Sdim    { X86::VPAVGWrr,          X86::VPAVGWrm,           0 },
943252723Sdim    { X86::VPBLENDWrri,       X86::VPBLENDWrmi,        0 },
944252723Sdim    { X86::VPCMPEQBrr,        X86::VPCMPEQBrm,         0 },
945252723Sdim    { X86::VPCMPEQDrr,        X86::VPCMPEQDrm,         0 },
946252723Sdim    { X86::VPCMPEQQrr,        X86::VPCMPEQQrm,         0 },
947252723Sdim    { X86::VPCMPEQWrr,        X86::VPCMPEQWrm,         0 },
948252723Sdim    { X86::VPCMPGTBrr,        X86::VPCMPGTBrm,         0 },
949252723Sdim    { X86::VPCMPGTDrr,        X86::VPCMPGTDrm,         0 },
950252723Sdim    { X86::VPCMPGTQrr,        X86::VPCMPGTQrm,         0 },
951252723Sdim    { X86::VPCMPGTWrr,        X86::VPCMPGTWrm,         0 },
952252723Sdim    { X86::VPHADDDrr,         X86::VPHADDDrm,          0 },
953252723Sdim    { X86::VPHADDSWrr128,     X86::VPHADDSWrm128,      0 },
954252723Sdim    { X86::VPHADDWrr,         X86::VPHADDWrm,          0 },
955252723Sdim    { X86::VPHSUBDrr,         X86::VPHSUBDrm,          0 },
956252723Sdim    { X86::VPHSUBSWrr128,     X86::VPHSUBSWrm128,      0 },
957252723Sdim    { X86::VPHSUBWrr,         X86::VPHSUBWrm,          0 },
958252723Sdim    { X86::VPERMILPDrr,       X86::VPERMILPDrm,        0 },
959252723Sdim    { X86::VPERMILPSrr,       X86::VPERMILPSrm,        0 },
960252723Sdim    { X86::VPINSRWrri,        X86::VPINSRWrmi,         0 },
961252723Sdim    { X86::VPMADDUBSWrr128,   X86::VPMADDUBSWrm128,    0 },
962252723Sdim    { X86::VPMADDWDrr,        X86::VPMADDWDrm,         0 },
963252723Sdim    { X86::VPMAXSWrr,         X86::VPMAXSWrm,          0 },
964252723Sdim    { X86::VPMAXUBrr,         X86::VPMAXUBrm,          0 },
965252723Sdim    { X86::VPMINSWrr,         X86::VPMINSWrm,          0 },
966252723Sdim    { X86::VPMINUBrr,         X86::VPMINUBrm,          0 },
967252723Sdim    { X86::VPMINSBrr,         X86::VPMINSBrm,          0 },
968252723Sdim    { X86::VPMINSDrr,         X86::VPMINSDrm,          0 },
969252723Sdim    { X86::VPMINUDrr,         X86::VPMINUDrm,          0 },
970252723Sdim    { X86::VPMINUWrr,         X86::VPMINUWrm,          0 },
971252723Sdim    { X86::VPMAXSBrr,         X86::VPMAXSBrm,          0 },
972252723Sdim    { X86::VPMAXSDrr,         X86::VPMAXSDrm,          0 },
973252723Sdim    { X86::VPMAXUDrr,         X86::VPMAXUDrm,          0 },
974252723Sdim    { X86::VPMAXUWrr,         X86::VPMAXUWrm,          0 },
975252723Sdim    { X86::VPMULDQrr,         X86::VPMULDQrm,          0 },
976252723Sdim    { X86::VPMULHRSWrr128,    X86::VPMULHRSWrm128,     0 },
977252723Sdim    { X86::VPMULHUWrr,        X86::VPMULHUWrm,         0 },
978252723Sdim    { X86::VPMULHWrr,         X86::VPMULHWrm,          0 },
979252723Sdim    { X86::VPMULLDrr,         X86::VPMULLDrm,          0 },
980252723Sdim    { X86::VPMULLWrr,         X86::VPMULLWrm,          0 },
981252723Sdim    { X86::VPMULUDQrr,        X86::VPMULUDQrm,         0 },
982252723Sdim    { X86::VPORrr,            X86::VPORrm,             0 },
983252723Sdim    { X86::VPSADBWrr,         X86::VPSADBWrm,          0 },
984252723Sdim    { X86::VPSHUFBrr,         X86::VPSHUFBrm,          0 },
985252723Sdim    { X86::VPSIGNBrr,         X86::VPSIGNBrm,          0 },
986252723Sdim    { X86::VPSIGNWrr,         X86::VPSIGNWrm,          0 },
987252723Sdim    { X86::VPSIGNDrr,         X86::VPSIGNDrm,          0 },
988252723Sdim    { X86::VPSLLDrr,          X86::VPSLLDrm,           0 },
989252723Sdim    { X86::VPSLLQrr,          X86::VPSLLQrm,           0 },
990252723Sdim    { X86::VPSLLWrr,          X86::VPSLLWrm,           0 },
991252723Sdim    { X86::VPSRADrr,          X86::VPSRADrm,           0 },
992252723Sdim    { X86::VPSRAWrr,          X86::VPSRAWrm,           0 },
993252723Sdim    { X86::VPSRLDrr,          X86::VPSRLDrm,           0 },
994252723Sdim    { X86::VPSRLQrr,          X86::VPSRLQrm,           0 },
995252723Sdim    { X86::VPSRLWrr,          X86::VPSRLWrm,           0 },
996252723Sdim    { X86::VPSUBBrr,          X86::VPSUBBrm,           0 },
997252723Sdim    { X86::VPSUBDrr,          X86::VPSUBDrm,           0 },
998252723Sdim    { X86::VPSUBSBrr,         X86::VPSUBSBrm,          0 },
999252723Sdim    { X86::VPSUBSWrr,         X86::VPSUBSWrm,          0 },
1000252723Sdim    { X86::VPSUBWrr,          X86::VPSUBWrm,           0 },
1001252723Sdim    { X86::VPUNPCKHBWrr,      X86::VPUNPCKHBWrm,       0 },
1002252723Sdim    { X86::VPUNPCKHDQrr,      X86::VPUNPCKHDQrm,       0 },
1003252723Sdim    { X86::VPUNPCKHQDQrr,     X86::VPUNPCKHQDQrm,      0 },
1004252723Sdim    { X86::VPUNPCKHWDrr,      X86::VPUNPCKHWDrm,       0 },
1005252723Sdim    { X86::VPUNPCKLBWrr,      X86::VPUNPCKLBWrm,       0 },
1006252723Sdim    { X86::VPUNPCKLDQrr,      X86::VPUNPCKLDQrm,       0 },
1007252723Sdim    { X86::VPUNPCKLQDQrr,     X86::VPUNPCKLQDQrm,      0 },
1008252723Sdim    { X86::VPUNPCKLWDrr,      X86::VPUNPCKLWDrm,       0 },
1009252723Sdim    { X86::VPXORrr,           X86::VPXORrm,            0 },
1010252723Sdim    { X86::VSHUFPDrri,        X86::VSHUFPDrmi,         0 },
1011252723Sdim    { X86::VSHUFPSrri,        X86::VSHUFPSrmi,         0 },
1012252723Sdim    { X86::VSUBPDrr,          X86::VSUBPDrm,           0 },
1013252723Sdim    { X86::VSUBPSrr,          X86::VSUBPSrm,           0 },
1014226890Sdim    { X86::VSUBSDrr,          X86::VSUBSDrm,           0 },
1015226890Sdim    { X86::VSUBSSrr,          X86::VSUBSSrm,           0 },
1016252723Sdim    { X86::VUNPCKHPDrr,       X86::VUNPCKHPDrm,        0 },
1017252723Sdim    { X86::VUNPCKHPSrr,       X86::VUNPCKHPSrm,        0 },
1018252723Sdim    { X86::VUNPCKLPDrr,       X86::VUNPCKLPDrm,        0 },
1019252723Sdim    { X86::VUNPCKLPSrr,       X86::VUNPCKLPSrm,        0 },
1020252723Sdim    { X86::VXORPDrr,          X86::VXORPDrm,           0 },
1021252723Sdim    { X86::VXORPSrr,          X86::VXORPSrm,           0 },
1022235633Sdim    // AVX 256-bit foldable instructions
1023252723Sdim    { X86::VADDPDYrr,         X86::VADDPDYrm,          0 },
1024252723Sdim    { X86::VADDPSYrr,         X86::VADDPSYrm,          0 },
1025252723Sdim    { X86::VADDSUBPDYrr,      X86::VADDSUBPDYrm,       0 },
1026252723Sdim    { X86::VADDSUBPSYrr,      X86::VADDSUBPSYrm,       0 },
1027252723Sdim    { X86::VANDNPDYrr,        X86::VANDNPDYrm,         0 },
1028252723Sdim    { X86::VANDNPSYrr,        X86::VANDNPSYrm,         0 },
1029252723Sdim    { X86::VANDPDYrr,         X86::VANDPDYrm,          0 },
1030252723Sdim    { X86::VANDPSYrr,         X86::VANDPSYrm,          0 },
1031252723Sdim    { X86::VBLENDPDYrri,      X86::VBLENDPDYrmi,       0 },
1032252723Sdim    { X86::VBLENDPSYrri,      X86::VBLENDPSYrmi,       0 },
1033252723Sdim    { X86::VBLENDVPDYrr,      X86::VBLENDVPDYrm,       0 },
1034252723Sdim    { X86::VBLENDVPSYrr,      X86::VBLENDVPSYrm,       0 },
1035252723Sdim    { X86::VCMPPDYrri,        X86::VCMPPDYrmi,         0 },
1036252723Sdim    { X86::VCMPPSYrri,        X86::VCMPPSYrmi,         0 },
1037252723Sdim    { X86::VDIVPDYrr,         X86::VDIVPDYrm,          0 },
1038252723Sdim    { X86::VDIVPSYrr,         X86::VDIVPSYrm,          0 },
1039252723Sdim    { X86::VHADDPDYrr,        X86::VHADDPDYrm,         0 },
1040252723Sdim    { X86::VHADDPSYrr,        X86::VHADDPSYrm,         0 },
1041252723Sdim    { X86::VHSUBPDYrr,        X86::VHSUBPDYrm,         0 },
1042252723Sdim    { X86::VHSUBPSYrr,        X86::VHSUBPSYrm,         0 },
1043252723Sdim    { X86::VINSERTF128rr,     X86::VINSERTF128rm,      0 },
1044252723Sdim    { X86::VMAXPDYrr,         X86::VMAXPDYrm,          0 },
1045252723Sdim    { X86::VMAXPSYrr,         X86::VMAXPSYrm,          0 },
1046252723Sdim    { X86::VMINPDYrr,         X86::VMINPDYrm,          0 },
1047252723Sdim    { X86::VMINPSYrr,         X86::VMINPSYrm,          0 },
1048252723Sdim    { X86::VMULPDYrr,         X86::VMULPDYrm,          0 },
1049252723Sdim    { X86::VMULPSYrr,         X86::VMULPSYrm,          0 },
1050252723Sdim    { X86::VORPDYrr,          X86::VORPDYrm,           0 },
1051252723Sdim    { X86::VORPSYrr,          X86::VORPSYrm,           0 },
1052252723Sdim    { X86::VPERM2F128rr,      X86::VPERM2F128rm,       0 },
1053252723Sdim    { X86::VPERMILPDYrr,      X86::VPERMILPDYrm,       0 },
1054252723Sdim    { X86::VPERMILPSYrr,      X86::VPERMILPSYrm,       0 },
1055252723Sdim    { X86::VSHUFPDYrri,       X86::VSHUFPDYrmi,        0 },
1056252723Sdim    { X86::VSHUFPSYrri,       X86::VSHUFPSYrmi,        0 },
1057252723Sdim    { X86::VSUBPDYrr,         X86::VSUBPDYrm,          0 },
1058252723Sdim    { X86::VSUBPSYrr,         X86::VSUBPSYrm,          0 },
1059252723Sdim    { X86::VUNPCKHPDYrr,      X86::VUNPCKHPDYrm,       0 },
1060252723Sdim    { X86::VUNPCKHPSYrr,      X86::VUNPCKHPSYrm,       0 },
1061252723Sdim    { X86::VUNPCKLPDYrr,      X86::VUNPCKLPDYrm,       0 },
1062252723Sdim    { X86::VUNPCKLPSYrr,      X86::VUNPCKLPSYrm,       0 },
1063252723Sdim    { X86::VXORPDYrr,         X86::VXORPDYrm,          0 },
1064252723Sdim    { X86::VXORPSYrr,         X86::VXORPSYrm,          0 },
1065235633Sdim    // AVX2 foldable instructions
1066252723Sdim    { X86::VINSERTI128rr,     X86::VINSERTI128rm,      0 },
1067252723Sdim    { X86::VPACKSSDWYrr,      X86::VPACKSSDWYrm,       0 },
1068252723Sdim    { X86::VPACKSSWBYrr,      X86::VPACKSSWBYrm,       0 },
1069252723Sdim    { X86::VPACKUSDWYrr,      X86::VPACKUSDWYrm,       0 },
1070252723Sdim    { X86::VPACKUSWBYrr,      X86::VPACKUSWBYrm,       0 },
1071252723Sdim    { X86::VPADDBYrr,         X86::VPADDBYrm,          0 },
1072252723Sdim    { X86::VPADDDYrr,         X86::VPADDDYrm,          0 },
1073252723Sdim    { X86::VPADDQYrr,         X86::VPADDQYrm,          0 },
1074252723Sdim    { X86::VPADDSBYrr,        X86::VPADDSBYrm,         0 },
1075252723Sdim    { X86::VPADDSWYrr,        X86::VPADDSWYrm,         0 },
1076252723Sdim    { X86::VPADDUSBYrr,       X86::VPADDUSBYrm,        0 },
1077252723Sdim    { X86::VPADDUSWYrr,       X86::VPADDUSWYrm,        0 },
1078252723Sdim    { X86::VPADDWYrr,         X86::VPADDWYrm,          0 },
1079252723Sdim    { X86::VPALIGNR256rr,     X86::VPALIGNR256rm,      0 },
1080252723Sdim    { X86::VPANDNYrr,         X86::VPANDNYrm,          0 },
1081252723Sdim    { X86::VPANDYrr,          X86::VPANDYrm,           0 },
1082252723Sdim    { X86::VPAVGBYrr,         X86::VPAVGBYrm,          0 },
1083252723Sdim    { X86::VPAVGWYrr,         X86::VPAVGWYrm,          0 },
1084252723Sdim    { X86::VPBLENDDrri,       X86::VPBLENDDrmi,        0 },
1085252723Sdim    { X86::VPBLENDDYrri,      X86::VPBLENDDYrmi,       0 },
1086252723Sdim    { X86::VPBLENDWYrri,      X86::VPBLENDWYrmi,       0 },
1087252723Sdim    { X86::VPCMPEQBYrr,       X86::VPCMPEQBYrm,        0 },
1088252723Sdim    { X86::VPCMPEQDYrr,       X86::VPCMPEQDYrm,        0 },
1089252723Sdim    { X86::VPCMPEQQYrr,       X86::VPCMPEQQYrm,        0 },
1090252723Sdim    { X86::VPCMPEQWYrr,       X86::VPCMPEQWYrm,        0 },
1091252723Sdim    { X86::VPCMPGTBYrr,       X86::VPCMPGTBYrm,        0 },
1092252723Sdim    { X86::VPCMPGTDYrr,       X86::VPCMPGTDYrm,        0 },
1093252723Sdim    { X86::VPCMPGTQYrr,       X86::VPCMPGTQYrm,        0 },
1094252723Sdim    { X86::VPCMPGTWYrr,       X86::VPCMPGTWYrm,        0 },
1095252723Sdim    { X86::VPERM2I128rr,      X86::VPERM2I128rm,       0 },
1096252723Sdim    { X86::VPERMDYrr,         X86::VPERMDYrm,          0 },
1097252723Sdim    { X86::VPERMPDYri,        X86::VPERMPDYmi,         0 },
1098252723Sdim    { X86::VPERMPSYrr,        X86::VPERMPSYrm,         0 },
1099252723Sdim    { X86::VPERMQYri,         X86::VPERMQYmi,          0 },
1100252723Sdim    { X86::VPHADDDYrr,        X86::VPHADDDYrm,         0 },
1101252723Sdim    { X86::VPHADDSWrr256,     X86::VPHADDSWrm256,      0 },
1102252723Sdim    { X86::VPHADDWYrr,        X86::VPHADDWYrm,         0 },
1103252723Sdim    { X86::VPHSUBDYrr,        X86::VPHSUBDYrm,         0 },
1104252723Sdim    { X86::VPHSUBSWrr256,     X86::VPHSUBSWrm256,      0 },
1105252723Sdim    { X86::VPHSUBWYrr,        X86::VPHSUBWYrm,         0 },
1106252723Sdim    { X86::VPMADDUBSWrr256,   X86::VPMADDUBSWrm256,    0 },
1107252723Sdim    { X86::VPMADDWDYrr,       X86::VPMADDWDYrm,        0 },
1108252723Sdim    { X86::VPMAXSWYrr,        X86::VPMAXSWYrm,         0 },
1109252723Sdim    { X86::VPMAXUBYrr,        X86::VPMAXUBYrm,         0 },
1110252723Sdim    { X86::VPMINSWYrr,        X86::VPMINSWYrm,         0 },
1111252723Sdim    { X86::VPMINUBYrr,        X86::VPMINUBYrm,         0 },
1112252723Sdim    { X86::VPMINSBYrr,        X86::VPMINSBYrm,         0 },
1113252723Sdim    { X86::VPMINSDYrr,        X86::VPMINSDYrm,         0 },
1114252723Sdim    { X86::VPMINUDYrr,        X86::VPMINUDYrm,         0 },
1115252723Sdim    { X86::VPMINUWYrr,        X86::VPMINUWYrm,         0 },
1116252723Sdim    { X86::VPMAXSBYrr,        X86::VPMAXSBYrm,         0 },
1117252723Sdim    { X86::VPMAXSDYrr,        X86::VPMAXSDYrm,         0 },
1118252723Sdim    { X86::VPMAXUDYrr,        X86::VPMAXUDYrm,         0 },
1119252723Sdim    { X86::VPMAXUWYrr,        X86::VPMAXUWYrm,         0 },
1120252723Sdim    { X86::VMPSADBWYrri,      X86::VMPSADBWYrmi,       0 },
1121252723Sdim    { X86::VPMULDQYrr,        X86::VPMULDQYrm,         0 },
1122252723Sdim    { X86::VPMULHRSWrr256,    X86::VPMULHRSWrm256,     0 },
1123252723Sdim    { X86::VPMULHUWYrr,       X86::VPMULHUWYrm,        0 },
1124252723Sdim    { X86::VPMULHWYrr,        X86::VPMULHWYrm,         0 },
1125252723Sdim    { X86::VPMULLDYrr,        X86::VPMULLDYrm,         0 },
1126252723Sdim    { X86::VPMULLWYrr,        X86::VPMULLWYrm,         0 },
1127252723Sdim    { X86::VPMULUDQYrr,       X86::VPMULUDQYrm,        0 },
1128252723Sdim    { X86::VPORYrr,           X86::VPORYrm,            0 },
1129252723Sdim    { X86::VPSADBWYrr,        X86::VPSADBWYrm,         0 },
1130252723Sdim    { X86::VPSHUFBYrr,        X86::VPSHUFBYrm,         0 },
1131252723Sdim    { X86::VPSIGNBYrr,        X86::VPSIGNBYrm,         0 },
1132252723Sdim    { X86::VPSIGNWYrr,        X86::VPSIGNWYrm,         0 },
1133252723Sdim    { X86::VPSIGNDYrr,        X86::VPSIGNDYrm,         0 },
1134252723Sdim    { X86::VPSLLDYrr,         X86::VPSLLDYrm,          0 },
1135252723Sdim    { X86::VPSLLQYrr,         X86::VPSLLQYrm,          0 },
1136252723Sdim    { X86::VPSLLWYrr,         X86::VPSLLWYrm,          0 },
1137252723Sdim    { X86::VPSLLVDrr,         X86::VPSLLVDrm,          0 },
1138252723Sdim    { X86::VPSLLVDYrr,        X86::VPSLLVDYrm,         0 },
1139252723Sdim    { X86::VPSLLVQrr,         X86::VPSLLVQrm,          0 },
1140252723Sdim    { X86::VPSLLVQYrr,        X86::VPSLLVQYrm,         0 },
1141252723Sdim    { X86::VPSRADYrr,         X86::VPSRADYrm,          0 },
1142252723Sdim    { X86::VPSRAWYrr,         X86::VPSRAWYrm,          0 },
1143252723Sdim    { X86::VPSRAVDrr,         X86::VPSRAVDrm,          0 },
1144252723Sdim    { X86::VPSRAVDYrr,        X86::VPSRAVDYrm,         0 },
1145252723Sdim    { X86::VPSRLDYrr,         X86::VPSRLDYrm,          0 },
1146252723Sdim    { X86::VPSRLQYrr,         X86::VPSRLQYrm,          0 },
1147252723Sdim    { X86::VPSRLWYrr,         X86::VPSRLWYrm,          0 },
1148252723Sdim    { X86::VPSRLVDrr,         X86::VPSRLVDrm,          0 },
1149252723Sdim    { X86::VPSRLVDYrr,        X86::VPSRLVDYrm,         0 },
1150252723Sdim    { X86::VPSRLVQrr,         X86::VPSRLVQrm,          0 },
1151252723Sdim    { X86::VPSRLVQYrr,        X86::VPSRLVQYrm,         0 },
1152252723Sdim    { X86::VPSUBBYrr,         X86::VPSUBBYrm,          0 },
1153252723Sdim    { X86::VPSUBDYrr,         X86::VPSUBDYrm,          0 },
1154252723Sdim    { X86::VPSUBSBYrr,        X86::VPSUBSBYrm,         0 },
1155252723Sdim    { X86::VPSUBSWYrr,        X86::VPSUBSWYrm,         0 },
1156252723Sdim    { X86::VPSUBWYrr,         X86::VPSUBWYrm,          0 },
1157252723Sdim    { X86::VPUNPCKHBWYrr,     X86::VPUNPCKHBWYrm,      0 },
1158252723Sdim    { X86::VPUNPCKHDQYrr,     X86::VPUNPCKHDQYrm,      0 },
1159252723Sdim    { X86::VPUNPCKHQDQYrr,    X86::VPUNPCKHQDQYrm,     0 },
1160252723Sdim    { X86::VPUNPCKHWDYrr,     X86::VPUNPCKHWDYrm,      0 },
1161252723Sdim    { X86::VPUNPCKLBWYrr,     X86::VPUNPCKLBWYrm,      0 },
1162252723Sdim    { X86::VPUNPCKLDQYrr,     X86::VPUNPCKLDQYrm,      0 },
1163252723Sdim    { X86::VPUNPCKLQDQYrr,    X86::VPUNPCKLQDQYrm,     0 },
1164252723Sdim    { X86::VPUNPCKLWDYrr,     X86::VPUNPCKLWDYrm,      0 },
1165252723Sdim    { X86::VPXORYrr,          X86::VPXORYrm,           0 },
1166226890Sdim    // FIXME: add AVX 256-bit foldable instructions
1167245431Sdim
1168245431Sdim    // FMA4 foldable patterns
1169245431Sdim    { X86::VFMADDSS4rr,       X86::VFMADDSS4mr,        0           },
1170245431Sdim    { X86::VFMADDSD4rr,       X86::VFMADDSD4mr,        0           },
1171245431Sdim    { X86::VFMADDPS4rr,       X86::VFMADDPS4mr,        TB_ALIGN_16 },
1172245431Sdim    { X86::VFMADDPD4rr,       X86::VFMADDPD4mr,        TB_ALIGN_16 },
1173245431Sdim    { X86::VFMADDPS4rrY,      X86::VFMADDPS4mrY,       TB_ALIGN_32 },
1174245431Sdim    { X86::VFMADDPD4rrY,      X86::VFMADDPD4mrY,       TB_ALIGN_32 },
1175245431Sdim    { X86::VFNMADDSS4rr,      X86::VFNMADDSS4mr,       0           },
1176245431Sdim    { X86::VFNMADDSD4rr,      X86::VFNMADDSD4mr,       0           },
1177245431Sdim    { X86::VFNMADDPS4rr,      X86::VFNMADDPS4mr,       TB_ALIGN_16 },
1178245431Sdim    { X86::VFNMADDPD4rr,      X86::VFNMADDPD4mr,       TB_ALIGN_16 },
1179245431Sdim    { X86::VFNMADDPS4rrY,     X86::VFNMADDPS4mrY,      TB_ALIGN_32 },
1180245431Sdim    { X86::VFNMADDPD4rrY,     X86::VFNMADDPD4mrY,      TB_ALIGN_32 },
1181245431Sdim    { X86::VFMSUBSS4rr,       X86::VFMSUBSS4mr,        0           },
1182245431Sdim    { X86::VFMSUBSD4rr,       X86::VFMSUBSD4mr,        0           },
1183245431Sdim    { X86::VFMSUBPS4rr,       X86::VFMSUBPS4mr,        TB_ALIGN_16 },
1184245431Sdim    { X86::VFMSUBPD4rr,       X86::VFMSUBPD4mr,        TB_ALIGN_16 },
1185245431Sdim    { X86::VFMSUBPS4rrY,      X86::VFMSUBPS4mrY,       TB_ALIGN_32 },
1186245431Sdim    { X86::VFMSUBPD4rrY,      X86::VFMSUBPD4mrY,       TB_ALIGN_32 },
1187245431Sdim    { X86::VFNMSUBSS4rr,      X86::VFNMSUBSS4mr,       0           },
1188245431Sdim    { X86::VFNMSUBSD4rr,      X86::VFNMSUBSD4mr,       0           },
1189245431Sdim    { X86::VFNMSUBPS4rr,      X86::VFNMSUBPS4mr,       TB_ALIGN_16 },
1190245431Sdim    { X86::VFNMSUBPD4rr,      X86::VFNMSUBPD4mr,       TB_ALIGN_16 },
1191245431Sdim    { X86::VFNMSUBPS4rrY,     X86::VFNMSUBPS4mrY,      TB_ALIGN_32 },
1192245431Sdim    { X86::VFNMSUBPD4rrY,     X86::VFNMSUBPD4mrY,      TB_ALIGN_32 },
1193245431Sdim    { X86::VFMADDSUBPS4rr,    X86::VFMADDSUBPS4mr,     TB_ALIGN_16 },
1194245431Sdim    { X86::VFMADDSUBPD4rr,    X86::VFMADDSUBPD4mr,     TB_ALIGN_16 },
1195245431Sdim    { X86::VFMADDSUBPS4rrY,   X86::VFMADDSUBPS4mrY,    TB_ALIGN_32 },
1196245431Sdim    { X86::VFMADDSUBPD4rrY,   X86::VFMADDSUBPD4mrY,    TB_ALIGN_32 },
1197245431Sdim    { X86::VFMSUBADDPS4rr,    X86::VFMSUBADDPS4mr,     TB_ALIGN_16 },
1198245431Sdim    { X86::VFMSUBADDPD4rr,    X86::VFMSUBADDPD4mr,     TB_ALIGN_16 },
1199245431Sdim    { X86::VFMSUBADDPS4rrY,   X86::VFMSUBADDPS4mrY,    TB_ALIGN_32 },
1200245431Sdim    { X86::VFMSUBADDPD4rrY,   X86::VFMSUBADDPD4mrY,    TB_ALIGN_32 },
1201245431Sdim
1202245431Sdim    // BMI/BMI2 foldable instructions
1203252723Sdim    { X86::ANDN32rr,          X86::ANDN32rm,            0 },
1204252723Sdim    { X86::ANDN64rr,          X86::ANDN64rm,            0 },
1205245431Sdim    { X86::MULX32rr,          X86::MULX32rm,            0 },
1206245431Sdim    { X86::MULX64rr,          X86::MULX64rm,            0 },
1207252723Sdim    { X86::PDEP32rr,          X86::PDEP32rm,            0 },
1208252723Sdim    { X86::PDEP64rr,          X86::PDEP64rm,            0 },
1209252723Sdim    { X86::PEXT32rr,          X86::PEXT32rm,            0 },
1210252723Sdim    { X86::PEXT64rr,          X86::PEXT64rm,            0 },
1211263509Sdim
1212263509Sdim    // AVX-512 foldable instructions
1213263509Sdim    { X86::VPADDDZrr,         X86::VPADDDZrm,           0 },
1214263509Sdim    { X86::VPADDQZrr,         X86::VPADDQZrm,           0 },
1215263509Sdim    { X86::VADDPSZrr,         X86::VADDPSZrm,           0 },
1216263509Sdim    { X86::VADDPDZrr,         X86::VADDPDZrm,           0 },
1217263509Sdim    { X86::VSUBPSZrr,         X86::VSUBPSZrm,           0 },
1218263509Sdim    { X86::VSUBPDZrr,         X86::VSUBPDZrm,           0 },
1219263509Sdim    { X86::VMULPSZrr,         X86::VMULPSZrm,           0 },
1220263509Sdim    { X86::VMULPDZrr,         X86::VMULPDZrm,           0 },
1221263509Sdim    { X86::VDIVPSZrr,         X86::VDIVPSZrm,           0 },
1222263509Sdim    { X86::VDIVPDZrr,         X86::VDIVPDZrm,           0 },
1223263509Sdim    { X86::VMINPSZrr,         X86::VMINPSZrm,           0 },
1224263509Sdim    { X86::VMINPDZrr,         X86::VMINPDZrm,           0 },
1225263509Sdim    { X86::VMAXPSZrr,         X86::VMAXPSZrm,           0 },
1226263509Sdim    { X86::VMAXPDZrr,         X86::VMAXPDZrm,           0 },
1227263509Sdim    { X86::VPERMPDZri,        X86::VPERMPDZmi,          0 },
1228263509Sdim    { X86::VPERMPSZrr,        X86::VPERMPSZrm,          0 },
1229263509Sdim    { X86::VPSLLVDZrr,        X86::VPSLLVDZrm,          0 },
1230263509Sdim    { X86::VPSLLVQZrr,        X86::VPSLLVQZrm,          0 },
1231263509Sdim    { X86::VPSRAVDZrr,        X86::VPSRAVDZrm,          0 },
1232263509Sdim    { X86::VPSRLVDZrr,        X86::VPSRLVDZrm,          0 },
1233263509Sdim    { X86::VPSRLVQZrr,        X86::VPSRLVQZrm,          0 },
1234263509Sdim    { X86::VSHUFPDZrri,       X86::VSHUFPDZrmi,         0 },
1235263509Sdim    { X86::VSHUFPSZrri,       X86::VSHUFPSZrmi,         0 },
1236263509Sdim    { X86::VALIGNQrri,        X86::VALIGNQrmi,          0 },
1237263509Sdim    { X86::VALIGNDrri,        X86::VALIGNDrmi,          0 },
1238263509Sdim
1239263509Sdim    // AES foldable instructions
1240263509Sdim    { X86::AESDECLASTrr,      X86::AESDECLASTrm,        TB_ALIGN_16 },
1241263509Sdim    { X86::AESDECrr,          X86::AESDECrm,            TB_ALIGN_16 },
1242263509Sdim    { X86::AESENCLASTrr,      X86::AESENCLASTrm,        TB_ALIGN_16 },
1243263509Sdim    { X86::AESENCrr,          X86::AESENCrm,            TB_ALIGN_16 },
1244263509Sdim    { X86::VAESDECLASTrr,     X86::VAESDECLASTrm,       TB_ALIGN_16 },
1245263509Sdim    { X86::VAESDECrr,         X86::VAESDECrm,           TB_ALIGN_16 },
1246263509Sdim    { X86::VAESENCLASTrr,     X86::VAESENCLASTrm,       TB_ALIGN_16 },
1247263509Sdim    { X86::VAESENCrr,         X86::VAESENCrm,           TB_ALIGN_16 },
1248263509Sdim
1249263509Sdim    // SHA foldable instructions
1250263509Sdim    { X86::SHA1MSG1rr,        X86::SHA1MSG1rm,          TB_ALIGN_16 },
1251263509Sdim    { X86::SHA1MSG2rr,        X86::SHA1MSG2rm,          TB_ALIGN_16 },
1252263509Sdim    { X86::SHA1NEXTErr,       X86::SHA1NEXTErm,         TB_ALIGN_16 },
1253263509Sdim    { X86::SHA1RNDS4rri,      X86::SHA1RNDS4rmi,        TB_ALIGN_16 },
1254263509Sdim    { X86::SHA256MSG1rr,      X86::SHA256MSG1rm,        TB_ALIGN_16 },
1255263509Sdim    { X86::SHA256MSG2rr,      X86::SHA256MSG2rm,        TB_ALIGN_16 },
1256263509Sdim    { X86::SHA256RNDS2rr,     X86::SHA256RNDS2rm,       TB_ALIGN_16 },
1257193323Sed  };
1258193323Sed
1259193323Sed  for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
1260235633Sdim    unsigned RegOp = OpTbl2[i].RegOp;
1261235633Sdim    unsigned MemOp = OpTbl2[i].MemOp;
1262235633Sdim    unsigned Flags = OpTbl2[i].Flags;
1263226890Sdim    AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
1264226890Sdim                  RegOp, MemOp,
1265226890Sdim                  // Index 2, folded load
1266226890Sdim                  Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
1267226890Sdim  }
1268245431Sdim
1269245431Sdim  static const X86OpTblEntry OpTbl3[] = {
1270245431Sdim    // FMA foldable instructions
1271245431Sdim    { X86::VFMADDSSr231r,         X86::VFMADDSSr231m,         0 },
1272245431Sdim    { X86::VFMADDSDr231r,         X86::VFMADDSDr231m,         0 },
1273245431Sdim    { X86::VFMADDSSr132r,         X86::VFMADDSSr132m,         0 },
1274245431Sdim    { X86::VFMADDSDr132r,         X86::VFMADDSDr132m,         0 },
1275245431Sdim    { X86::VFMADDSSr213r,         X86::VFMADDSSr213m,         0 },
1276245431Sdim    { X86::VFMADDSDr213r,         X86::VFMADDSDr213m,         0 },
1277245431Sdim    { X86::VFMADDSSr213r_Int,     X86::VFMADDSSr213m_Int,     0 },
1278245431Sdim    { X86::VFMADDSDr213r_Int,     X86::VFMADDSDr213m_Int,     0 },
1279245431Sdim
1280245431Sdim    { X86::VFMADDPSr231r,         X86::VFMADDPSr231m,         TB_ALIGN_16 },
1281245431Sdim    { X86::VFMADDPDr231r,         X86::VFMADDPDr231m,         TB_ALIGN_16 },
1282245431Sdim    { X86::VFMADDPSr132r,         X86::VFMADDPSr132m,         TB_ALIGN_16 },
1283245431Sdim    { X86::VFMADDPDr132r,         X86::VFMADDPDr132m,         TB_ALIGN_16 },
1284245431Sdim    { X86::VFMADDPSr213r,         X86::VFMADDPSr213m,         TB_ALIGN_16 },
1285245431Sdim    { X86::VFMADDPDr213r,         X86::VFMADDPDr213m,         TB_ALIGN_16 },
1286245431Sdim    { X86::VFMADDPSr231rY,        X86::VFMADDPSr231mY,        TB_ALIGN_32 },
1287245431Sdim    { X86::VFMADDPDr231rY,        X86::VFMADDPDr231mY,        TB_ALIGN_32 },
1288245431Sdim    { X86::VFMADDPSr132rY,        X86::VFMADDPSr132mY,        TB_ALIGN_32 },
1289245431Sdim    { X86::VFMADDPDr132rY,        X86::VFMADDPDr132mY,        TB_ALIGN_32 },
1290245431Sdim    { X86::VFMADDPSr213rY,        X86::VFMADDPSr213mY,        TB_ALIGN_32 },
1291245431Sdim    { X86::VFMADDPDr213rY,        X86::VFMADDPDr213mY,        TB_ALIGN_32 },
1292245431Sdim
1293245431Sdim    { X86::VFNMADDSSr231r,        X86::VFNMADDSSr231m,        0 },
1294245431Sdim    { X86::VFNMADDSDr231r,        X86::VFNMADDSDr231m,        0 },
1295245431Sdim    { X86::VFNMADDSSr132r,        X86::VFNMADDSSr132m,        0 },
1296245431Sdim    { X86::VFNMADDSDr132r,        X86::VFNMADDSDr132m,        0 },
1297245431Sdim    { X86::VFNMADDSSr213r,        X86::VFNMADDSSr213m,        0 },
1298245431Sdim    { X86::VFNMADDSDr213r,        X86::VFNMADDSDr213m,        0 },
1299245431Sdim    { X86::VFNMADDSSr213r_Int,    X86::VFNMADDSSr213m_Int,    0 },
1300245431Sdim    { X86::VFNMADDSDr213r_Int,    X86::VFNMADDSDr213m_Int,    0 },
1301245431Sdim
1302245431Sdim    { X86::VFNMADDPSr231r,        X86::VFNMADDPSr231m,        TB_ALIGN_16 },
1303245431Sdim    { X86::VFNMADDPDr231r,        X86::VFNMADDPDr231m,        TB_ALIGN_16 },
1304245431Sdim    { X86::VFNMADDPSr132r,        X86::VFNMADDPSr132m,        TB_ALIGN_16 },
1305245431Sdim    { X86::VFNMADDPDr132r,        X86::VFNMADDPDr132m,        TB_ALIGN_16 },
1306245431Sdim    { X86::VFNMADDPSr213r,        X86::VFNMADDPSr213m,        TB_ALIGN_16 },
1307245431Sdim    { X86::VFNMADDPDr213r,        X86::VFNMADDPDr213m,        TB_ALIGN_16 },
1308245431Sdim    { X86::VFNMADDPSr231rY,       X86::VFNMADDPSr231mY,       TB_ALIGN_32 },
1309245431Sdim    { X86::VFNMADDPDr231rY,       X86::VFNMADDPDr231mY,       TB_ALIGN_32 },
1310245431Sdim    { X86::VFNMADDPSr132rY,       X86::VFNMADDPSr132mY,       TB_ALIGN_32 },
1311245431Sdim    { X86::VFNMADDPDr132rY,       X86::VFNMADDPDr132mY,       TB_ALIGN_32 },
1312245431Sdim    { X86::VFNMADDPSr213rY,       X86::VFNMADDPSr213mY,       TB_ALIGN_32 },
1313245431Sdim    { X86::VFNMADDPDr213rY,       X86::VFNMADDPDr213mY,       TB_ALIGN_32 },
1314245431Sdim
1315245431Sdim    { X86::VFMSUBSSr231r,         X86::VFMSUBSSr231m,         0 },
1316245431Sdim    { X86::VFMSUBSDr231r,         X86::VFMSUBSDr231m,         0 },
1317245431Sdim    { X86::VFMSUBSSr132r,         X86::VFMSUBSSr132m,         0 },
1318245431Sdim    { X86::VFMSUBSDr132r,         X86::VFMSUBSDr132m,         0 },
1319245431Sdim    { X86::VFMSUBSSr213r,         X86::VFMSUBSSr213m,         0 },
1320245431Sdim    { X86::VFMSUBSDr213r,         X86::VFMSUBSDr213m,         0 },
1321245431Sdim    { X86::VFMSUBSSr213r_Int,     X86::VFMSUBSSr213m_Int,     0 },
1322245431Sdim    { X86::VFMSUBSDr213r_Int,     X86::VFMSUBSDr213m_Int,     0 },
1323245431Sdim
1324245431Sdim    { X86::VFMSUBPSr231r,         X86::VFMSUBPSr231m,         TB_ALIGN_16 },
1325245431Sdim    { X86::VFMSUBPDr231r,         X86::VFMSUBPDr231m,         TB_ALIGN_16 },
1326245431Sdim    { X86::VFMSUBPSr132r,         X86::VFMSUBPSr132m,         TB_ALIGN_16 },
1327245431Sdim    { X86::VFMSUBPDr132r,         X86::VFMSUBPDr132m,         TB_ALIGN_16 },
1328245431Sdim    { X86::VFMSUBPSr213r,         X86::VFMSUBPSr213m,         TB_ALIGN_16 },
1329245431Sdim    { X86::VFMSUBPDr213r,         X86::VFMSUBPDr213m,         TB_ALIGN_16 },
1330245431Sdim    { X86::VFMSUBPSr231rY,        X86::VFMSUBPSr231mY,        TB_ALIGN_32 },
1331245431Sdim    { X86::VFMSUBPDr231rY,        X86::VFMSUBPDr231mY,        TB_ALIGN_32 },
1332245431Sdim    { X86::VFMSUBPSr132rY,        X86::VFMSUBPSr132mY,        TB_ALIGN_32 },
1333245431Sdim    { X86::VFMSUBPDr132rY,        X86::VFMSUBPDr132mY,        TB_ALIGN_32 },
1334245431Sdim    { X86::VFMSUBPSr213rY,        X86::VFMSUBPSr213mY,        TB_ALIGN_32 },
1335245431Sdim    { X86::VFMSUBPDr213rY,        X86::VFMSUBPDr213mY,        TB_ALIGN_32 },
1336245431Sdim
1337245431Sdim    { X86::VFNMSUBSSr231r,        X86::VFNMSUBSSr231m,        0 },
1338245431Sdim    { X86::VFNMSUBSDr231r,        X86::VFNMSUBSDr231m,        0 },
1339245431Sdim    { X86::VFNMSUBSSr132r,        X86::VFNMSUBSSr132m,        0 },
1340245431Sdim    { X86::VFNMSUBSDr132r,        X86::VFNMSUBSDr132m,        0 },
1341245431Sdim    { X86::VFNMSUBSSr213r,        X86::VFNMSUBSSr213m,        0 },
1342245431Sdim    { X86::VFNMSUBSDr213r,        X86::VFNMSUBSDr213m,        0 },
1343245431Sdim    { X86::VFNMSUBSSr213r_Int,    X86::VFNMSUBSSr213m_Int,    0 },
1344245431Sdim    { X86::VFNMSUBSDr213r_Int,    X86::VFNMSUBSDr213m_Int,    0 },
1345245431Sdim
1346245431Sdim    { X86::VFNMSUBPSr231r,        X86::VFNMSUBPSr231m,        TB_ALIGN_16 },
1347245431Sdim    { X86::VFNMSUBPDr231r,        X86::VFNMSUBPDr231m,        TB_ALIGN_16 },
1348245431Sdim    { X86::VFNMSUBPSr132r,        X86::VFNMSUBPSr132m,        TB_ALIGN_16 },
1349245431Sdim    { X86::VFNMSUBPDr132r,        X86::VFNMSUBPDr132m,        TB_ALIGN_16 },
1350245431Sdim    { X86::VFNMSUBPSr213r,        X86::VFNMSUBPSr213m,        TB_ALIGN_16 },
1351245431Sdim    { X86::VFNMSUBPDr213r,        X86::VFNMSUBPDr213m,        TB_ALIGN_16 },
1352245431Sdim    { X86::VFNMSUBPSr231rY,       X86::VFNMSUBPSr231mY,       TB_ALIGN_32 },
1353245431Sdim    { X86::VFNMSUBPDr231rY,       X86::VFNMSUBPDr231mY,       TB_ALIGN_32 },
1354245431Sdim    { X86::VFNMSUBPSr132rY,       X86::VFNMSUBPSr132mY,       TB_ALIGN_32 },
1355245431Sdim    { X86::VFNMSUBPDr132rY,       X86::VFNMSUBPDr132mY,       TB_ALIGN_32 },
1356245431Sdim    { X86::VFNMSUBPSr213rY,       X86::VFNMSUBPSr213mY,       TB_ALIGN_32 },
1357245431Sdim    { X86::VFNMSUBPDr213rY,       X86::VFNMSUBPDr213mY,       TB_ALIGN_32 },
1358245431Sdim
1359245431Sdim    { X86::VFMADDSUBPSr231r,      X86::VFMADDSUBPSr231m,      TB_ALIGN_16 },
1360245431Sdim    { X86::VFMADDSUBPDr231r,      X86::VFMADDSUBPDr231m,      TB_ALIGN_16 },
1361245431Sdim    { X86::VFMADDSUBPSr132r,      X86::VFMADDSUBPSr132m,      TB_ALIGN_16 },
1362245431Sdim    { X86::VFMADDSUBPDr132r,      X86::VFMADDSUBPDr132m,      TB_ALIGN_16 },
1363245431Sdim    { X86::VFMADDSUBPSr213r,      X86::VFMADDSUBPSr213m,      TB_ALIGN_16 },
1364245431Sdim    { X86::VFMADDSUBPDr213r,      X86::VFMADDSUBPDr213m,      TB_ALIGN_16 },
1365245431Sdim    { X86::VFMADDSUBPSr231rY,     X86::VFMADDSUBPSr231mY,     TB_ALIGN_32 },
1366245431Sdim    { X86::VFMADDSUBPDr231rY,     X86::VFMADDSUBPDr231mY,     TB_ALIGN_32 },
1367245431Sdim    { X86::VFMADDSUBPSr132rY,     X86::VFMADDSUBPSr132mY,     TB_ALIGN_32 },
1368245431Sdim    { X86::VFMADDSUBPDr132rY,     X86::VFMADDSUBPDr132mY,     TB_ALIGN_32 },
1369245431Sdim    { X86::VFMADDSUBPSr213rY,     X86::VFMADDSUBPSr213mY,     TB_ALIGN_32 },
1370245431Sdim    { X86::VFMADDSUBPDr213rY,     X86::VFMADDSUBPDr213mY,     TB_ALIGN_32 },
1371245431Sdim
1372245431Sdim    { X86::VFMSUBADDPSr231r,      X86::VFMSUBADDPSr231m,      TB_ALIGN_16 },
1373245431Sdim    { X86::VFMSUBADDPDr231r,      X86::VFMSUBADDPDr231m,      TB_ALIGN_16 },
1374245431Sdim    { X86::VFMSUBADDPSr132r,      X86::VFMSUBADDPSr132m,      TB_ALIGN_16 },
1375245431Sdim    { X86::VFMSUBADDPDr132r,      X86::VFMSUBADDPDr132m,      TB_ALIGN_16 },
1376245431Sdim    { X86::VFMSUBADDPSr213r,      X86::VFMSUBADDPSr213m,      TB_ALIGN_16 },
1377245431Sdim    { X86::VFMSUBADDPDr213r,      X86::VFMSUBADDPDr213m,      TB_ALIGN_16 },
1378245431Sdim    { X86::VFMSUBADDPSr231rY,     X86::VFMSUBADDPSr231mY,     TB_ALIGN_32 },
1379245431Sdim    { X86::VFMSUBADDPDr231rY,     X86::VFMSUBADDPDr231mY,     TB_ALIGN_32 },
1380245431Sdim    { X86::VFMSUBADDPSr132rY,     X86::VFMSUBADDPSr132mY,     TB_ALIGN_32 },
1381245431Sdim    { X86::VFMSUBADDPDr132rY,     X86::VFMSUBADDPDr132mY,     TB_ALIGN_32 },
1382245431Sdim    { X86::VFMSUBADDPSr213rY,     X86::VFMSUBADDPSr213mY,     TB_ALIGN_32 },
1383245431Sdim    { X86::VFMSUBADDPDr213rY,     X86::VFMSUBADDPDr213mY,     TB_ALIGN_32 },
1384245431Sdim
1385245431Sdim    // FMA4 foldable patterns
1386245431Sdim    { X86::VFMADDSS4rr,           X86::VFMADDSS4rm,           0           },
1387245431Sdim    { X86::VFMADDSD4rr,           X86::VFMADDSD4rm,           0           },
1388245431Sdim    { X86::VFMADDPS4rr,           X86::VFMADDPS4rm,           TB_ALIGN_16 },
1389245431Sdim    { X86::VFMADDPD4rr,           X86::VFMADDPD4rm,           TB_ALIGN_16 },
1390245431Sdim    { X86::VFMADDPS4rrY,          X86::VFMADDPS4rmY,          TB_ALIGN_32 },
1391245431Sdim    { X86::VFMADDPD4rrY,          X86::VFMADDPD4rmY,          TB_ALIGN_32 },
1392245431Sdim    { X86::VFNMADDSS4rr,          X86::VFNMADDSS4rm,          0           },
1393245431Sdim    { X86::VFNMADDSD4rr,          X86::VFNMADDSD4rm,          0           },
1394245431Sdim    { X86::VFNMADDPS4rr,          X86::VFNMADDPS4rm,          TB_ALIGN_16 },
1395245431Sdim    { X86::VFNMADDPD4rr,          X86::VFNMADDPD4rm,          TB_ALIGN_16 },
1396245431Sdim    { X86::VFNMADDPS4rrY,         X86::VFNMADDPS4rmY,         TB_ALIGN_32 },
1397245431Sdim    { X86::VFNMADDPD4rrY,         X86::VFNMADDPD4rmY,         TB_ALIGN_32 },
1398245431Sdim    { X86::VFMSUBSS4rr,           X86::VFMSUBSS4rm,           0           },
1399245431Sdim    { X86::VFMSUBSD4rr,           X86::VFMSUBSD4rm,           0           },
1400245431Sdim    { X86::VFMSUBPS4rr,           X86::VFMSUBPS4rm,           TB_ALIGN_16 },
1401245431Sdim    { X86::VFMSUBPD4rr,           X86::VFMSUBPD4rm,           TB_ALIGN_16 },
1402245431Sdim    { X86::VFMSUBPS4rrY,          X86::VFMSUBPS4rmY,          TB_ALIGN_32 },
1403245431Sdim    { X86::VFMSUBPD4rrY,          X86::VFMSUBPD4rmY,          TB_ALIGN_32 },
1404245431Sdim    { X86::VFNMSUBSS4rr,          X86::VFNMSUBSS4rm,          0           },
1405245431Sdim    { X86::VFNMSUBSD4rr,          X86::VFNMSUBSD4rm,          0           },
1406245431Sdim    { X86::VFNMSUBPS4rr,          X86::VFNMSUBPS4rm,          TB_ALIGN_16 },
1407245431Sdim    { X86::VFNMSUBPD4rr,          X86::VFNMSUBPD4rm,          TB_ALIGN_16 },
1408245431Sdim    { X86::VFNMSUBPS4rrY,         X86::VFNMSUBPS4rmY,         TB_ALIGN_32 },
1409245431Sdim    { X86::VFNMSUBPD4rrY,         X86::VFNMSUBPD4rmY,         TB_ALIGN_32 },
1410245431Sdim    { X86::VFMADDSUBPS4rr,        X86::VFMADDSUBPS4rm,        TB_ALIGN_16 },
1411245431Sdim    { X86::VFMADDSUBPD4rr,        X86::VFMADDSUBPD4rm,        TB_ALIGN_16 },
1412245431Sdim    { X86::VFMADDSUBPS4rrY,       X86::VFMADDSUBPS4rmY,       TB_ALIGN_32 },
1413245431Sdim    { X86::VFMADDSUBPD4rrY,       X86::VFMADDSUBPD4rmY,       TB_ALIGN_32 },
1414245431Sdim    { X86::VFMSUBADDPS4rr,        X86::VFMSUBADDPS4rm,        TB_ALIGN_16 },
1415245431Sdim    { X86::VFMSUBADDPD4rr,        X86::VFMSUBADDPD4rm,        TB_ALIGN_16 },
1416245431Sdim    { X86::VFMSUBADDPS4rrY,       X86::VFMSUBADDPS4rmY,       TB_ALIGN_32 },
1417245431Sdim    { X86::VFMSUBADDPD4rrY,       X86::VFMSUBADDPD4rmY,       TB_ALIGN_32 },
1418263509Sdim    // AVX-512 VPERMI instructions with 3 source operands.
1419263509Sdim    { X86::VPERMI2Drr,            X86::VPERMI2Drm,            0 },
1420263509Sdim    { X86::VPERMI2Qrr,            X86::VPERMI2Qrm,            0 },
1421263509Sdim    { X86::VPERMI2PSrr,           X86::VPERMI2PSrm,           0 },
1422263509Sdim    { X86::VPERMI2PDrr,           X86::VPERMI2PDrm,           0 },
1423245431Sdim  };
1424245431Sdim
1425245431Sdim  for (unsigned i = 0, e = array_lengthof(OpTbl3); i != e; ++i) {
1426245431Sdim    unsigned RegOp = OpTbl3[i].RegOp;
1427245431Sdim    unsigned MemOp = OpTbl3[i].MemOp;
1428245431Sdim    unsigned Flags = OpTbl3[i].Flags;
1429245431Sdim    AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
1430245431Sdim                  RegOp, MemOp,
1431245431Sdim                  // Index 3, folded load
1432245431Sdim                  Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
1433245431Sdim  }
1434245431Sdim
1435226890Sdim}
1436218893Sdim
1437226890Sdimvoid
1438226890SdimX86InstrInfo::AddTableEntry(RegOp2MemOpTableType &R2MTable,
1439226890Sdim                            MemOp2RegOpTableType &M2RTable,
1440226890Sdim                            unsigned RegOp, unsigned MemOp, unsigned Flags) {
1441226890Sdim    if ((Flags & TB_NO_FORWARD) == 0) {
1442226890Sdim      assert(!R2MTable.count(RegOp) && "Duplicate entry!");
1443226890Sdim      R2MTable[RegOp] = std::make_pair(MemOp, Flags);
1444226890Sdim    }
1445226890Sdim    if ((Flags & TB_NO_REVERSE) == 0) {
1446226890Sdim      assert(!M2RTable.count(MemOp) &&
1447218893Sdim           "Duplicated entries in unfolding maps?");
1448226890Sdim      M2RTable[MemOp] = std::make_pair(RegOp, Flags);
1449226890Sdim    }
1450193323Sed}
1451193323Sed
1452202375Srdivackybool
1453202375SrdivackyX86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
1454202375Srdivacky                                    unsigned &SrcReg, unsigned &DstReg,
1455202375Srdivacky                                    unsigned &SubIdx) const {
1456202375Srdivacky  switch (MI.getOpcode()) {
1457202375Srdivacky  default: break;
1458202375Srdivacky  case X86::MOVSX16rr8:
1459202375Srdivacky  case X86::MOVZX16rr8:
1460202375Srdivacky  case X86::MOVSX32rr8:
1461202375Srdivacky  case X86::MOVZX32rr8:
1462202375Srdivacky  case X86::MOVSX64rr8:
1463202375Srdivacky    if (!TM.getSubtarget<X86Subtarget>().is64Bit())
1464202375Srdivacky      // It's not always legal to reference the low 8-bit of the larger
1465202375Srdivacky      // register in 32-bit mode.
1466202375Srdivacky      return false;
1467202375Srdivacky  case X86::MOVSX32rr16:
1468202375Srdivacky  case X86::MOVZX32rr16:
1469202375Srdivacky  case X86::MOVSX64rr16:
1470263509Sdim  case X86::MOVSX64rr32: {
1471202375Srdivacky    if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
1472202375Srdivacky      // Be conservative.
1473202375Srdivacky      return false;
1474202375Srdivacky    SrcReg = MI.getOperand(1).getReg();
1475202375Srdivacky    DstReg = MI.getOperand(0).getReg();
1476202375Srdivacky    switch (MI.getOpcode()) {
1477245431Sdim    default: llvm_unreachable("Unreachable!");
1478202375Srdivacky    case X86::MOVSX16rr8:
1479202375Srdivacky    case X86::MOVZX16rr8:
1480202375Srdivacky    case X86::MOVSX32rr8:
1481202375Srdivacky    case X86::MOVZX32rr8:
1482202375Srdivacky    case X86::MOVSX64rr8:
1483208599Srdivacky      SubIdx = X86::sub_8bit;
1484202375Srdivacky      break;
1485202375Srdivacky    case X86::MOVSX32rr16:
1486202375Srdivacky    case X86::MOVZX32rr16:
1487202375Srdivacky    case X86::MOVSX64rr16:
1488208599Srdivacky      SubIdx = X86::sub_16bit;
1489202375Srdivacky      break;
1490202375Srdivacky    case X86::MOVSX64rr32:
1491208599Srdivacky      SubIdx = X86::sub_32bit;
1492202375Srdivacky      break;
1493202375Srdivacky    }
1494202375Srdivacky    return true;
1495202375Srdivacky  }
1496202375Srdivacky  }
1497202375Srdivacky  return false;
1498202375Srdivacky}
1499202375Srdivacky
1500199481Srdivacky/// isFrameOperand - Return true and the FrameIndex if the specified
1501199481Srdivacky/// operand and follow operands form a reference to the stack frame.
1502199481Srdivackybool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
1503199481Srdivacky                                  int &FrameIndex) const {
1504199481Srdivacky  if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() &&
1505199481Srdivacky      MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() &&
1506199481Srdivacky      MI->getOperand(Op+1).getImm() == 1 &&
1507199481Srdivacky      MI->getOperand(Op+2).getReg() == 0 &&
1508199481Srdivacky      MI->getOperand(Op+3).getImm() == 0) {
1509199481Srdivacky    FrameIndex = MI->getOperand(Op).getIndex();
1510199481Srdivacky    return true;
1511199481Srdivacky  }
1512199481Srdivacky  return false;
1513199481Srdivacky}
1514199481Srdivacky
1515199481Srdivackystatic bool isFrameLoadOpcode(int Opcode) {
1516199481Srdivacky  switch (Opcode) {
1517235633Sdim  default:
1518235633Sdim    return false;
1519193323Sed  case X86::MOV8rm:
1520193323Sed  case X86::MOV16rm:
1521193323Sed  case X86::MOV32rm:
1522193323Sed  case X86::MOV64rm:
1523193323Sed  case X86::LD_Fp64m:
1524193323Sed  case X86::MOVSSrm:
1525193323Sed  case X86::MOVSDrm:
1526193323Sed  case X86::MOVAPSrm:
1527193323Sed  case X86::MOVAPDrm:
1528193323Sed  case X86::MOVDQArm:
1529226890Sdim  case X86::VMOVSSrm:
1530226890Sdim  case X86::VMOVSDrm:
1531226890Sdim  case X86::VMOVAPSrm:
1532226890Sdim  case X86::VMOVAPDrm:
1533226890Sdim  case X86::VMOVDQArm:
1534224145Sdim  case X86::VMOVAPSYrm:
1535224145Sdim  case X86::VMOVAPDYrm:
1536224145Sdim  case X86::VMOVDQAYrm:
1537193323Sed  case X86::MMX_MOVD64rm:
1538193323Sed  case X86::MMX_MOVQ64rm:
1539263509Sdim  case X86::VMOVDQA32rm:
1540263509Sdim  case X86::VMOVDQA64rm:
1541199481Srdivacky    return true;
1542193323Sed  }
1543193323Sed}
1544193323Sed
1545199481Srdivackystatic bool isFrameStoreOpcode(int Opcode) {
1546199481Srdivacky  switch (Opcode) {
1547193323Sed  default: break;
1548193323Sed  case X86::MOV8mr:
1549193323Sed  case X86::MOV16mr:
1550193323Sed  case X86::MOV32mr:
1551193323Sed  case X86::MOV64mr:
1552193323Sed  case X86::ST_FpP64m:
1553193323Sed  case X86::MOVSSmr:
1554193323Sed  case X86::MOVSDmr:
1555193323Sed  case X86::MOVAPSmr:
1556193323Sed  case X86::MOVAPDmr:
1557193323Sed  case X86::MOVDQAmr:
1558226890Sdim  case X86::VMOVSSmr:
1559226890Sdim  case X86::VMOVSDmr:
1560226890Sdim  case X86::VMOVAPSmr:
1561226890Sdim  case X86::VMOVAPDmr:
1562226890Sdim  case X86::VMOVDQAmr:
1563224145Sdim  case X86::VMOVAPSYmr:
1564224145Sdim  case X86::VMOVAPDYmr:
1565224145Sdim  case X86::VMOVDQAYmr:
1566193323Sed  case X86::MMX_MOVD64mr:
1567193323Sed  case X86::MMX_MOVQ64mr:
1568193323Sed  case X86::MMX_MOVNTQmr:
1569199481Srdivacky    return true;
1570199481Srdivacky  }
1571199481Srdivacky  return false;
1572199481Srdivacky}
1573199481Srdivacky
1574218893Sdimunsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
1575199481Srdivacky                                           int &FrameIndex) const {
1576199481Srdivacky  if (isFrameLoadOpcode(MI->getOpcode()))
1577212904Sdim    if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
1578199481Srdivacky      return MI->getOperand(0).getReg();
1579199481Srdivacky  return 0;
1580199481Srdivacky}
1581199481Srdivacky
1582218893Sdimunsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
1583199481Srdivacky                                                 int &FrameIndex) const {
1584199481Srdivacky  if (isFrameLoadOpcode(MI->getOpcode())) {
1585199481Srdivacky    unsigned Reg;
1586199481Srdivacky    if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
1587199481Srdivacky      return Reg;
1588199481Srdivacky    // Check for post-frame index elimination operations
1589200581Srdivacky    const MachineMemOperand *Dummy;
1590200581Srdivacky    return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
1591199481Srdivacky  }
1592199481Srdivacky  return 0;
1593199481Srdivacky}
1594199481Srdivacky
1595199481Srdivackyunsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
1596199481Srdivacky                                          int &FrameIndex) const {
1597199481Srdivacky  if (isFrameStoreOpcode(MI->getOpcode()))
1598212904Sdim    if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
1599212904Sdim        isFrameOperand(MI, 0, FrameIndex))
1600210299Sed      return MI->getOperand(X86::AddrNumOperands).getReg();
1601199481Srdivacky  return 0;
1602199481Srdivacky}
1603199481Srdivacky
1604199481Srdivackyunsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
1605199481Srdivacky                                                int &FrameIndex) const {
1606199481Srdivacky  if (isFrameStoreOpcode(MI->getOpcode())) {
1607199481Srdivacky    unsigned Reg;
1608199481Srdivacky    if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
1609199481Srdivacky      return Reg;
1610199481Srdivacky    // Check for post-frame index elimination operations
1611200581Srdivacky    const MachineMemOperand *Dummy;
1612200581Srdivacky    return hasStoreToStackSlot(MI, Dummy, FrameIndex);
1613193323Sed  }
1614193323Sed  return 0;
1615193323Sed}
1616193323Sed
1617193323Sed/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
1618193323Sed/// X86::MOVPC32r.
1619193323Sedstatic bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
1620245431Sdim  // Don't waste compile time scanning use-def chains of physregs.
1621245431Sdim  if (!TargetRegisterInfo::isVirtualRegister(BaseReg))
1622245431Sdim    return false;
1623193323Sed  bool isPICBase = false;
1624193323Sed  for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
1625193323Sed         E = MRI.def_end(); I != E; ++I) {
1626193323Sed    MachineInstr *DefMI = I.getOperand().getParent();
1627193323Sed    if (DefMI->getOpcode() != X86::MOVPC32r)
1628193323Sed      return false;
1629193323Sed    assert(!isPICBase && "More than one PIC base?");
1630193323Sed    isPICBase = true;
1631193323Sed  }
1632193323Sed  return isPICBase;
1633193323Sed}
1634193323Sed
1635193323Sedbool
1636198090SrdivackyX86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
1637198090Srdivacky                                                AliasAnalysis *AA) const {
1638193323Sed  switch (MI->getOpcode()) {
1639193323Sed  default: break;
1640245431Sdim  case X86::MOV8rm:
1641245431Sdim  case X86::MOV16rm:
1642245431Sdim  case X86::MOV32rm:
1643245431Sdim  case X86::MOV64rm:
1644245431Sdim  case X86::LD_Fp64m:
1645245431Sdim  case X86::MOVSSrm:
1646245431Sdim  case X86::MOVSDrm:
1647245431Sdim  case X86::MOVAPSrm:
1648245431Sdim  case X86::MOVUPSrm:
1649245431Sdim  case X86::MOVAPDrm:
1650245431Sdim  case X86::MOVDQArm:
1651252723Sdim  case X86::MOVDQUrm:
1652245431Sdim  case X86::VMOVSSrm:
1653245431Sdim  case X86::VMOVSDrm:
1654245431Sdim  case X86::VMOVAPSrm:
1655245431Sdim  case X86::VMOVUPSrm:
1656245431Sdim  case X86::VMOVAPDrm:
1657245431Sdim  case X86::VMOVDQArm:
1658252723Sdim  case X86::VMOVDQUrm:
1659245431Sdim  case X86::VMOVAPSYrm:
1660245431Sdim  case X86::VMOVUPSYrm:
1661245431Sdim  case X86::VMOVAPDYrm:
1662245431Sdim  case X86::VMOVDQAYrm:
1663252723Sdim  case X86::VMOVDQUYrm:
1664245431Sdim  case X86::MMX_MOVD64rm:
1665245431Sdim  case X86::MMX_MOVQ64rm:
1666245431Sdim  case X86::FsVMOVAPSrm:
1667245431Sdim  case X86::FsVMOVAPDrm:
1668245431Sdim  case X86::FsMOVAPSrm:
1669245431Sdim  case X86::FsMOVAPDrm: {
1670245431Sdim    // Loads from constant pools are trivially rematerializable.
1671245431Sdim    if (MI->getOperand(1).isReg() &&
1672245431Sdim        MI->getOperand(2).isImm() &&
1673245431Sdim        MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
1674245431Sdim        MI->isInvariantLoad(AA)) {
1675245431Sdim      unsigned BaseReg = MI->getOperand(1).getReg();
1676245431Sdim      if (BaseReg == 0 || BaseReg == X86::RIP)
1677245431Sdim        return true;
1678245431Sdim      // Allow re-materialization of PIC load.
1679245431Sdim      if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
1680245431Sdim        return false;
1681245431Sdim      const MachineFunction &MF = *MI->getParent()->getParent();
1682245431Sdim      const MachineRegisterInfo &MRI = MF.getRegInfo();
1683245431Sdim      return regIsPICBase(BaseReg, MRI);
1684193323Sed    }
1685245431Sdim    return false;
1686245431Sdim  }
1687218893Sdim
1688245431Sdim  case X86::LEA32r:
1689245431Sdim  case X86::LEA64r: {
1690245431Sdim    if (MI->getOperand(2).isImm() &&
1691245431Sdim        MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
1692245431Sdim        !MI->getOperand(4).isReg()) {
1693245431Sdim      // lea fi#, lea GV, etc. are all rematerializable.
1694245431Sdim      if (!MI->getOperand(1).isReg())
1695245431Sdim        return true;
1696245431Sdim      unsigned BaseReg = MI->getOperand(1).getReg();
1697245431Sdim      if (BaseReg == 0)
1698245431Sdim        return true;
1699245431Sdim      // Allow re-materialization of lea PICBase + x.
1700245431Sdim      const MachineFunction &MF = *MI->getParent()->getParent();
1701245431Sdim      const MachineRegisterInfo &MRI = MF.getRegInfo();
1702245431Sdim      return regIsPICBase(BaseReg, MRI);
1703245431Sdim    }
1704245431Sdim    return false;
1705193323Sed  }
1706245431Sdim  }
1707193323Sed
1708193323Sed  // All other instructions marked M_REMATERIALIZABLE are always trivially
1709193323Sed  // rematerializable.
1710193323Sed  return true;
1711193323Sed}
1712193323Sed
1713193323Sed/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
1714193323Sed/// would clobber the EFLAGS condition register. Note the result may be
1715193323Sed/// conservative. If it cannot definitely determine the safety after visiting
1716198090Srdivacky/// a few instructions in each direction it assumes it's not safe.
1717193323Sedstatic bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
1718193323Sed                                  MachineBasicBlock::iterator I) {
1719206083Srdivacky  MachineBasicBlock::iterator E = MBB.end();
1720206083Srdivacky
1721193323Sed  // For compile time consideration, if we are not able to determine the
1722198090Srdivacky  // safety after visiting 4 instructions in each direction, we will assume
1723198090Srdivacky  // it's not safe.
1724198090Srdivacky  MachineBasicBlock::iterator Iter = I;
1725226890Sdim  for (unsigned i = 0; Iter != E && i < 4; ++i) {
1726193323Sed    bool SeenDef = false;
1727198090Srdivacky    for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
1728198090Srdivacky      MachineOperand &MO = Iter->getOperand(j);
1729235633Sdim      if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
1730235633Sdim        SeenDef = true;
1731193323Sed      if (!MO.isReg())
1732193323Sed        continue;
1733193323Sed      if (MO.getReg() == X86::EFLAGS) {
1734193323Sed        if (MO.isUse())
1735193323Sed          return false;
1736193323Sed        SeenDef = true;
1737193323Sed      }
1738193323Sed    }
1739193323Sed
1740193323Sed    if (SeenDef)
1741193323Sed      // This instruction defines EFLAGS, no need to look any further.
1742193323Sed      return true;
1743198090Srdivacky    ++Iter;
1744206083Srdivacky    // Skip over DBG_VALUE.
1745206083Srdivacky    while (Iter != E && Iter->isDebugValue())
1746206083Srdivacky      ++Iter;
1747226890Sdim  }
1748193323Sed
1749226890Sdim  // It is safe to clobber EFLAGS at the end of a block of no successor has it
1750226890Sdim  // live in.
1751226890Sdim  if (Iter == E) {
1752226890Sdim    for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
1753226890Sdim           SE = MBB.succ_end(); SI != SE; ++SI)
1754226890Sdim      if ((*SI)->isLiveIn(X86::EFLAGS))
1755226890Sdim        return false;
1756226890Sdim    return true;
1757193323Sed  }
1758193323Sed
1759206083Srdivacky  MachineBasicBlock::iterator B = MBB.begin();
1760198090Srdivacky  Iter = I;
1761198090Srdivacky  for (unsigned i = 0; i < 4; ++i) {
1762198090Srdivacky    // If we make it to the beginning of the block, it's safe to clobber
1763198090Srdivacky    // EFLAGS iff EFLAGS is not live-in.
1764206083Srdivacky    if (Iter == B)
1765198090Srdivacky      return !MBB.isLiveIn(X86::EFLAGS);
1766198090Srdivacky
1767198090Srdivacky    --Iter;
1768206083Srdivacky    // Skip over DBG_VALUE.
1769206083Srdivacky    while (Iter != B && Iter->isDebugValue())
1770206083Srdivacky      --Iter;
1771206083Srdivacky
1772198090Srdivacky    bool SawKill = false;
1773198090Srdivacky    for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
1774198090Srdivacky      MachineOperand &MO = Iter->getOperand(j);
1775235633Sdim      // A register mask may clobber EFLAGS, but we should still look for a
1776235633Sdim      // live EFLAGS def.
1777235633Sdim      if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
1778235633Sdim        SawKill = true;
1779198090Srdivacky      if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
1780198090Srdivacky        if (MO.isDef()) return MO.isDead();
1781198090Srdivacky        if (MO.isKill()) SawKill = true;
1782198090Srdivacky      }
1783198090Srdivacky    }
1784198090Srdivacky
1785198090Srdivacky    if (SawKill)
1786198090Srdivacky      // This instruction kills EFLAGS and doesn't redefine it, so
1787198090Srdivacky      // there's no need to look further.
1788198090Srdivacky      return true;
1789198090Srdivacky  }
1790198090Srdivacky
1791193323Sed  // Conservative answer.
1792193323Sed  return false;
1793193323Sed}
1794193323Sed
1795193323Sedvoid X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
1796193323Sed                                 MachineBasicBlock::iterator I,
1797198090Srdivacky                                 unsigned DestReg, unsigned SubIdx,
1798199481Srdivacky                                 const MachineInstr *Orig,
1799210299Sed                                 const TargetRegisterInfo &TRI) const {
1800263509Sdim  // MOV32r0 is implemented with a xor which clobbers condition code.
1801263509Sdim  // Re-materialize it as movri instructions to avoid side effects.
1802198090Srdivacky  unsigned Opc = Orig->getOpcode();
1803263509Sdim  if (Opc == X86::MOV32r0 && !isSafeToClobberEFLAGS(MBB, I)) {
1804263509Sdim    DebugLoc DL = Orig->getDebugLoc();
1805263509Sdim    BuildMI(MBB, I, DL, get(X86::MOV32ri)).addOperand(Orig->getOperand(0))
1806263509Sdim      .addImm(0);
1807263509Sdim  } else {
1808193323Sed    MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
1809193323Sed    MBB.insert(I, MI);
1810193323Sed  }
1811193323Sed
1812198090Srdivacky  MachineInstr *NewMI = prior(I);
1813210299Sed  NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
1814193323Sed}
1815193323Sed
1816193323Sed/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
1817193323Sed/// is not marked dead.
1818193323Sedstatic bool hasLiveCondCodeDef(MachineInstr *MI) {
1819193323Sed  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1820193323Sed    MachineOperand &MO = MI->getOperand(i);
1821193323Sed    if (MO.isReg() && MO.isDef() &&
1822193323Sed        MO.getReg() == X86::EFLAGS && !MO.isDead()) {
1823193323Sed      return true;
1824193323Sed    }
1825193323Sed  }
1826193323Sed  return false;
1827193323Sed}
1828193323Sed
1829263509Sdim/// getTruncatedShiftCount - check whether the shift count for a machine operand
1830263509Sdim/// is non-zero.
1831263509Sdiminline static unsigned getTruncatedShiftCount(MachineInstr *MI,
1832263509Sdim                                              unsigned ShiftAmtOperandIdx) {
1833263509Sdim  // The shift count is six bits with the REX.W prefix and five bits without.
1834263509Sdim  unsigned ShiftCountMask = (MI->getDesc().TSFlags & X86II::REX_W) ? 63 : 31;
1835263509Sdim  unsigned Imm = MI->getOperand(ShiftAmtOperandIdx).getImm();
1836263509Sdim  return Imm & ShiftCountMask;
1837263509Sdim}
1838263509Sdim
1839263509Sdim/// isTruncatedShiftCountForLEA - check whether the given shift count is appropriate
1840263509Sdim/// can be represented by a LEA instruction.
1841263509Sdiminline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
1842263509Sdim  // Left shift instructions can be transformed into load-effective-address
1843263509Sdim  // instructions if we can encode them appropriately.
1844263509Sdim  // A LEA instruction utilizes a SIB byte to encode it's scale factor.
1845263509Sdim  // The SIB.scale field is two bits wide which means that we can encode any
1846263509Sdim  // shift amount less than 4.
1847263509Sdim  return ShAmt < 4 && ShAmt > 0;
1848263509Sdim}
1849263509Sdim
1850263509Sdimbool X86InstrInfo::classifyLEAReg(MachineInstr *MI, const MachineOperand &Src,
1851263509Sdim                                  unsigned Opc, bool AllowSP,
1852263509Sdim                                  unsigned &NewSrc, bool &isKill, bool &isUndef,
1853263509Sdim                                  MachineOperand &ImplicitOp) const {
1854263509Sdim  MachineFunction &MF = *MI->getParent()->getParent();
1855263509Sdim  const TargetRegisterClass *RC;
1856263509Sdim  if (AllowSP) {
1857263509Sdim    RC = Opc != X86::LEA32r ? &X86::GR64RegClass : &X86::GR32RegClass;
1858263509Sdim  } else {
1859263509Sdim    RC = Opc != X86::LEA32r ?
1860263509Sdim      &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
1861263509Sdim  }
1862263509Sdim  unsigned SrcReg = Src.getReg();
1863263509Sdim
1864263509Sdim  // For both LEA64 and LEA32 the register already has essentially the right
1865263509Sdim  // type (32-bit or 64-bit) we may just need to forbid SP.
1866263509Sdim  if (Opc != X86::LEA64_32r) {
1867263509Sdim    NewSrc = SrcReg;
1868263509Sdim    isKill = Src.isKill();
1869263509Sdim    isUndef = Src.isUndef();
1870263509Sdim
1871263509Sdim    if (TargetRegisterInfo::isVirtualRegister(NewSrc) &&
1872263509Sdim        !MF.getRegInfo().constrainRegClass(NewSrc, RC))
1873263509Sdim      return false;
1874263509Sdim
1875263509Sdim    return true;
1876263509Sdim  }
1877263509Sdim
1878263509Sdim  // This is for an LEA64_32r and incoming registers are 32-bit. One way or
1879263509Sdim  // another we need to add 64-bit registers to the final MI.
1880263509Sdim  if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
1881263509Sdim    ImplicitOp = Src;
1882263509Sdim    ImplicitOp.setImplicit();
1883263509Sdim
1884263509Sdim    NewSrc = getX86SubSuperRegister(Src.getReg(), MVT::i64);
1885263509Sdim    MachineBasicBlock::LivenessQueryResult LQR =
1886263509Sdim      MI->getParent()->computeRegisterLiveness(&getRegisterInfo(), NewSrc, MI);
1887263509Sdim
1888263509Sdim    switch (LQR) {
1889263509Sdim    case MachineBasicBlock::LQR_Unknown:
1890263509Sdim      // We can't give sane liveness flags to the instruction, abandon LEA
1891263509Sdim      // formation.
1892263509Sdim      return false;
1893263509Sdim    case MachineBasicBlock::LQR_Live:
1894263509Sdim      isKill = MI->killsRegister(SrcReg);
1895263509Sdim      isUndef = false;
1896263509Sdim      break;
1897263509Sdim    default:
1898263509Sdim      // The physreg itself is dead, so we have to use it as an <undef>.
1899263509Sdim      isKill = false;
1900263509Sdim      isUndef = true;
1901263509Sdim      break;
1902263509Sdim    }
1903263509Sdim  } else {
1904263509Sdim    // Virtual register of the wrong class, we have to create a temporary 64-bit
1905263509Sdim    // vreg to feed into the LEA.
1906263509Sdim    NewSrc = MF.getRegInfo().createVirtualRegister(RC);
1907263509Sdim    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
1908263509Sdim            get(TargetOpcode::COPY))
1909263509Sdim      .addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
1910263509Sdim        .addOperand(Src);
1911263509Sdim
1912263509Sdim    // Which is obviously going to be dead after we're done with it.
1913263509Sdim    isKill = true;
1914263509Sdim    isUndef = false;
1915263509Sdim  }
1916263509Sdim
1917263509Sdim  // We've set all the parameters without issue.
1918263509Sdim  return true;
1919263509Sdim}
1920263509Sdim
1921200581Srdivacky/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
1922200581Srdivacky/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
1923200581Srdivacky/// to a 32-bit superregister and then truncating back down to a 16-bit
1924200581Srdivacky/// subregister.
1925200581SrdivackyMachineInstr *
1926200581SrdivackyX86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
1927200581Srdivacky                                           MachineFunction::iterator &MFI,
1928200581Srdivacky                                           MachineBasicBlock::iterator &MBBI,
1929200581Srdivacky                                           LiveVariables *LV) const {
1930200581Srdivacky  MachineInstr *MI = MBBI;
1931200581Srdivacky  unsigned Dest = MI->getOperand(0).getReg();
1932200581Srdivacky  unsigned Src = MI->getOperand(1).getReg();
1933200581Srdivacky  bool isDead = MI->getOperand(0).isDead();
1934200581Srdivacky  bool isKill = MI->getOperand(1).isKill();
1935200581Srdivacky
1936200581Srdivacky  MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
1937200581Srdivacky  unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
1938263509Sdim  unsigned Opc, leaInReg;
1939263509Sdim  if (TM.getSubtarget<X86Subtarget>().is64Bit()) {
1940263509Sdim    Opc = X86::LEA64_32r;
1941263509Sdim    leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
1942263509Sdim  } else {
1943263509Sdim    Opc = X86::LEA32r;
1944263509Sdim    leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
1945263509Sdim  }
1946218893Sdim
1947200581Srdivacky  // Build and insert into an implicit UNDEF value. This is OK because
1948218893Sdim  // well be shifting and then extracting the lower 16-bits.
1949200581Srdivacky  // This has the potential to cause partial register stall. e.g.
1950200581Srdivacky  //   movw    (%rbp,%rcx,2), %dx
1951200581Srdivacky  //   leal    -65(%rdx), %esi
1952200581Srdivacky  // But testing has shown this *does* help performance in 64-bit mode (at
1953200581Srdivacky  // least on modern x86 machines).
1954200581Srdivacky  BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
1955200581Srdivacky  MachineInstr *InsMI =
1956210299Sed    BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
1957210299Sed    .addReg(leaInReg, RegState::Define, X86::sub_16bit)
1958210299Sed    .addReg(Src, getKillRegState(isKill));
1959200581Srdivacky
1960200581Srdivacky  MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
1961200581Srdivacky                                    get(Opc), leaOutReg);
1962200581Srdivacky  switch (MIOpc) {
1963245431Sdim  default: llvm_unreachable("Unreachable!");
1964200581Srdivacky  case X86::SHL16ri: {
1965200581Srdivacky    unsigned ShAmt = MI->getOperand(2).getImm();
1966200581Srdivacky    MIB.addReg(0).addImm(1 << ShAmt)
1967210299Sed       .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
1968200581Srdivacky    break;
1969200581Srdivacky  }
1970200581Srdivacky  case X86::INC16r:
1971200581Srdivacky  case X86::INC64_16r:
1972210299Sed    addRegOffset(MIB, leaInReg, true, 1);
1973200581Srdivacky    break;
1974200581Srdivacky  case X86::DEC16r:
1975200581Srdivacky  case X86::DEC64_16r:
1976210299Sed    addRegOffset(MIB, leaInReg, true, -1);
1977200581Srdivacky    break;
1978200581Srdivacky  case X86::ADD16ri:
1979200581Srdivacky  case X86::ADD16ri8:
1980218893Sdim  case X86::ADD16ri_DB:
1981218893Sdim  case X86::ADD16ri8_DB:
1982218893Sdim    addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
1983200581Srdivacky    break;
1984218893Sdim  case X86::ADD16rr:
1985218893Sdim  case X86::ADD16rr_DB: {
1986200581Srdivacky    unsigned Src2 = MI->getOperand(2).getReg();
1987200581Srdivacky    bool isKill2 = MI->getOperand(2).isKill();
1988200581Srdivacky    unsigned leaInReg2 = 0;
1989200581Srdivacky    MachineInstr *InsMI2 = 0;
1990200581Srdivacky    if (Src == Src2) {
1991200581Srdivacky      // ADD16rr %reg1028<kill>, %reg1028
1992200581Srdivacky      // just a single insert_subreg.
1993200581Srdivacky      addRegReg(MIB, leaInReg, true, leaInReg, false);
1994200581Srdivacky    } else {
1995263509Sdim      if (TM.getSubtarget<X86Subtarget>().is64Bit())
1996263509Sdim        leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
1997263509Sdim      else
1998263509Sdim        leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
1999200581Srdivacky      // Build and insert into an implicit UNDEF value. This is OK because
2000218893Sdim      // well be shifting and then extracting the lower 16-bits.
2001235633Sdim      BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF),leaInReg2);
2002200581Srdivacky      InsMI2 =
2003235633Sdim        BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
2004210299Sed        .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
2005210299Sed        .addReg(Src2, getKillRegState(isKill2));
2006200581Srdivacky      addRegReg(MIB, leaInReg, true, leaInReg2, true);
2007200581Srdivacky    }
2008200581Srdivacky    if (LV && isKill2 && InsMI2)
2009200581Srdivacky      LV->replaceKillInstruction(Src2, MI, InsMI2);
2010200581Srdivacky    break;
2011200581Srdivacky  }
2012200581Srdivacky  }
2013200581Srdivacky
2014200581Srdivacky  MachineInstr *NewMI = MIB;
2015200581Srdivacky  MachineInstr *ExtMI =
2016210299Sed    BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
2017200581Srdivacky    .addReg(Dest, RegState::Define | getDeadRegState(isDead))
2018210299Sed    .addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
2019200581Srdivacky
2020200581Srdivacky  if (LV) {
2021200581Srdivacky    // Update live variables
2022200581Srdivacky    LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
2023200581Srdivacky    LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
2024200581Srdivacky    if (isKill)
2025200581Srdivacky      LV->replaceKillInstruction(Src, MI, InsMI);
2026200581Srdivacky    if (isDead)
2027200581Srdivacky      LV->replaceKillInstruction(Dest, MI, ExtMI);
2028200581Srdivacky  }
2029200581Srdivacky
2030200581Srdivacky  return ExtMI;
2031200581Srdivacky}
2032200581Srdivacky
2033193323Sed/// convertToThreeAddress - This method must be implemented by targets that
2034193323Sed/// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
2035193323Sed/// may be able to convert a two-address instruction into a true
2036193323Sed/// three-address instruction on demand.  This allows the X86 target (for
2037193323Sed/// example) to convert ADD and SHL instructions into LEA instructions if they
2038193323Sed/// would require register copies due to two-addressness.
2039193323Sed///
2040193323Sed/// This method returns a null pointer if the transformation cannot be
2041193323Sed/// performed, otherwise it returns the new instruction.
2042193323Sed///
2043193323SedMachineInstr *
2044193323SedX86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
2045193323Sed                                    MachineBasicBlock::iterator &MBBI,
2046193323Sed                                    LiveVariables *LV) const {
2047193323Sed  MachineInstr *MI = MBBI;
2048263509Sdim
2049263509Sdim  // The following opcodes also sets the condition code register(s). Only
2050263509Sdim  // convert them to equivalent lea if the condition code register def's
2051263509Sdim  // are dead!
2052263509Sdim  if (hasLiveCondCodeDef(MI))
2053263509Sdim    return 0;
2054263509Sdim
2055193323Sed  MachineFunction &MF = *MI->getParent()->getParent();
2056193323Sed  // All instructions input are two-addr instructions.  Get the known operands.
2057245431Sdim  const MachineOperand &Dest = MI->getOperand(0);
2058245431Sdim  const MachineOperand &Src = MI->getOperand(1);
2059193323Sed
2060193323Sed  MachineInstr *NewMI = NULL;
2061193323Sed  // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's.  When
2062193323Sed  // we have better subtarget support, enable the 16-bit LEA generation here.
2063200581Srdivacky  // 16-bit LEA is also slow on Core2.
2064193323Sed  bool DisableLEA16 = true;
2065200581Srdivacky  bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
2066193323Sed
2067193323Sed  unsigned MIOpc = MI->getOpcode();
2068193323Sed  switch (MIOpc) {
2069193323Sed  case X86::SHUFPSrri: {
2070193323Sed    assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
2071193323Sed    if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
2072218893Sdim
2073193323Sed    unsigned B = MI->getOperand(1).getReg();
2074193323Sed    unsigned C = MI->getOperand(2).getReg();
2075193323Sed    if (B != C) return 0;
2076193323Sed    unsigned M = MI->getOperand(3).getImm();
2077193323Sed    NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
2078245431Sdim      .addOperand(Dest).addOperand(Src).addImm(M);
2079193323Sed    break;
2080193323Sed  }
2081235633Sdim  case X86::SHUFPDrri: {
2082235633Sdim    assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
2083235633Sdim    if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
2084235633Sdim
2085235633Sdim    unsigned B = MI->getOperand(1).getReg();
2086235633Sdim    unsigned C = MI->getOperand(2).getReg();
2087235633Sdim    if (B != C) return 0;
2088235633Sdim    unsigned M = MI->getOperand(3).getImm();
2089235633Sdim
2090235633Sdim    // Convert to PSHUFD mask.
2091235633Sdim    M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44;
2092235633Sdim
2093235633Sdim    NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
2094245431Sdim      .addOperand(Dest).addOperand(Src).addImm(M);
2095235633Sdim    break;
2096235633Sdim  }
2097193323Sed  case X86::SHL64ri: {
2098193323Sed    assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
2099263509Sdim    unsigned ShAmt = getTruncatedShiftCount(MI, 2);
2100263509Sdim    if (!isTruncatedShiftCountForLEA(ShAmt)) return 0;
2101193323Sed
2102218893Sdim    // LEA can't handle RSP.
2103245431Sdim    if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
2104245431Sdim        !MF.getRegInfo().constrainRegClass(Src.getReg(),
2105245431Sdim                                           &X86::GR64_NOSPRegClass))
2106218893Sdim      return 0;
2107218893Sdim
2108193323Sed    NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
2109245431Sdim      .addOperand(Dest)
2110245431Sdim      .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
2111193323Sed    break;
2112193323Sed  }
2113193323Sed  case X86::SHL32ri: {
2114193323Sed    assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
2115263509Sdim    unsigned ShAmt = getTruncatedShiftCount(MI, 2);
2116263509Sdim    if (!isTruncatedShiftCountForLEA(ShAmt)) return 0;
2117193323Sed
2118263509Sdim    unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
2119263509Sdim
2120218893Sdim    // LEA can't handle ESP.
2121263509Sdim    bool isKill, isUndef;
2122263509Sdim    unsigned SrcReg;
2123263509Sdim    MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
2124263509Sdim    if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
2125263509Sdim                        SrcReg, isKill, isUndef, ImplicitOp))
2126218893Sdim      return 0;
2127218893Sdim
2128263509Sdim    MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
2129245431Sdim      .addOperand(Dest)
2130263509Sdim      .addReg(0).addImm(1 << ShAmt)
2131263509Sdim      .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
2132263509Sdim      .addImm(0).addReg(0);
2133263509Sdim    if (ImplicitOp.getReg() != 0)
2134263509Sdim      MIB.addOperand(ImplicitOp);
2135263509Sdim    NewMI = MIB;
2136263509Sdim
2137193323Sed    break;
2138193323Sed  }
2139193323Sed  case X86::SHL16ri: {
2140193323Sed    assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
2141263509Sdim    unsigned ShAmt = getTruncatedShiftCount(MI, 2);
2142263509Sdim    if (!isTruncatedShiftCountForLEA(ShAmt)) return 0;
2143193323Sed
2144200581Srdivacky    if (DisableLEA16)
2145200581Srdivacky      return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
2146200581Srdivacky    NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
2147245431Sdim      .addOperand(Dest)
2148245431Sdim      .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
2149193323Sed    break;
2150193323Sed  }
2151193323Sed  default: {
2152193323Sed
2153193323Sed    switch (MIOpc) {
2154193323Sed    default: return 0;
2155193323Sed    case X86::INC64r:
2156193323Sed    case X86::INC32r:
2157193323Sed    case X86::INC64_32r: {
2158193323Sed      assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
2159193323Sed      unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
2160193323Sed        : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
2161263509Sdim      bool isKill, isUndef;
2162263509Sdim      unsigned SrcReg;
2163263509Sdim      MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
2164263509Sdim      if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
2165263509Sdim                          SrcReg, isKill, isUndef, ImplicitOp))
2166218893Sdim        return 0;
2167218893Sdim
2168263509Sdim      MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
2169263509Sdim          .addOperand(Dest)
2170263509Sdim          .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef));
2171263509Sdim      if (ImplicitOp.getReg() != 0)
2172263509Sdim        MIB.addOperand(ImplicitOp);
2173263509Sdim
2174263509Sdim      NewMI = addOffset(MIB, 1);
2175193323Sed      break;
2176193323Sed    }
2177193323Sed    case X86::INC16r:
2178193323Sed    case X86::INC64_16r:
2179200581Srdivacky      if (DisableLEA16)
2180200581Srdivacky        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
2181193323Sed      assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
2182245431Sdim      NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
2183245431Sdim                        .addOperand(Dest).addOperand(Src), 1);
2184193323Sed      break;
2185193323Sed    case X86::DEC64r:
2186193323Sed    case X86::DEC32r:
2187193323Sed    case X86::DEC64_32r: {
2188193323Sed      assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
2189193323Sed      unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
2190193323Sed        : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
2191263509Sdim
2192263509Sdim      bool isKill, isUndef;
2193263509Sdim      unsigned SrcReg;
2194263509Sdim      MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
2195263509Sdim      if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
2196263509Sdim                          SrcReg, isKill, isUndef, ImplicitOp))
2197218893Sdim        return 0;
2198218893Sdim
2199263509Sdim      MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
2200263509Sdim          .addOperand(Dest)
2201263509Sdim          .addReg(SrcReg, getUndefRegState(isUndef) | getKillRegState(isKill));
2202263509Sdim      if (ImplicitOp.getReg() != 0)
2203263509Sdim        MIB.addOperand(ImplicitOp);
2204263509Sdim
2205263509Sdim      NewMI = addOffset(MIB, -1);
2206263509Sdim
2207193323Sed      break;
2208193323Sed    }
2209193323Sed    case X86::DEC16r:
2210193323Sed    case X86::DEC64_16r:
2211200581Srdivacky      if (DisableLEA16)
2212200581Srdivacky        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
2213193323Sed      assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
2214245431Sdim      NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
2215245431Sdim                        .addOperand(Dest).addOperand(Src), -1);
2216193323Sed      break;
2217193323Sed    case X86::ADD64rr:
2218218893Sdim    case X86::ADD64rr_DB:
2219218893Sdim    case X86::ADD32rr:
2220218893Sdim    case X86::ADD32rr_DB: {
2221193323Sed      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
2222218893Sdim      unsigned Opc;
2223263509Sdim      if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB)
2224218893Sdim        Opc = X86::LEA64r;
2225263509Sdim      else
2226218893Sdim        Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
2227218893Sdim
2228263509Sdim      bool isKill, isUndef;
2229263509Sdim      unsigned SrcReg;
2230263509Sdim      MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
2231263509Sdim      if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
2232263509Sdim                          SrcReg, isKill, isUndef, ImplicitOp))
2233263509Sdim        return 0;
2234218893Sdim
2235263509Sdim      const MachineOperand &Src2 = MI->getOperand(2);
2236263509Sdim      bool isKill2, isUndef2;
2237263509Sdim      unsigned SrcReg2;
2238263509Sdim      MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
2239263509Sdim      if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
2240263509Sdim                          SrcReg2, isKill2, isUndef2, ImplicitOp2))
2241218893Sdim        return 0;
2242218893Sdim
2243263509Sdim      MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
2244263509Sdim        .addOperand(Dest);
2245263509Sdim      if (ImplicitOp.getReg() != 0)
2246263509Sdim        MIB.addOperand(ImplicitOp);
2247263509Sdim      if (ImplicitOp2.getReg() != 0)
2248263509Sdim        MIB.addOperand(ImplicitOp2);
2249245431Sdim
2250263509Sdim      NewMI = addRegReg(MIB, SrcReg, isKill, SrcReg2, isKill2);
2251263509Sdim
2252245431Sdim      // Preserve undefness of the operands.
2253245431Sdim      NewMI->getOperand(1).setIsUndef(isUndef);
2254245431Sdim      NewMI->getOperand(3).setIsUndef(isUndef2);
2255245431Sdim
2256263509Sdim      if (LV && Src2.isKill())
2257263509Sdim        LV->replaceKillInstruction(SrcReg2, MI, NewMI);
2258193323Sed      break;
2259193323Sed    }
2260218893Sdim    case X86::ADD16rr:
2261218893Sdim    case X86::ADD16rr_DB: {
2262200581Srdivacky      if (DisableLEA16)
2263200581Srdivacky        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
2264193323Sed      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
2265193323Sed      unsigned Src2 = MI->getOperand(2).getReg();
2266193323Sed      bool isKill2 = MI->getOperand(2).isKill();
2267193323Sed      NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
2268245431Sdim                        .addOperand(Dest),
2269245431Sdim                        Src.getReg(), Src.isKill(), Src2, isKill2);
2270245431Sdim
2271245431Sdim      // Preserve undefness of the operands.
2272245431Sdim      bool isUndef = MI->getOperand(1).isUndef();
2273245431Sdim      bool isUndef2 = MI->getOperand(2).isUndef();
2274245431Sdim      NewMI->getOperand(1).setIsUndef(isUndef);
2275245431Sdim      NewMI->getOperand(3).setIsUndef(isUndef2);
2276245431Sdim
2277193323Sed      if (LV && isKill2)
2278193323Sed        LV->replaceKillInstruction(Src2, MI, NewMI);
2279193323Sed      break;
2280193323Sed    }
2281193323Sed    case X86::ADD64ri32:
2282193323Sed    case X86::ADD64ri8:
2283218893Sdim    case X86::ADD64ri32_DB:
2284218893Sdim    case X86::ADD64ri8_DB:
2285193323Sed      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
2286245431Sdim      NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
2287245431Sdim                        .addOperand(Dest).addOperand(Src),
2288245431Sdim                        MI->getOperand(2).getImm());
2289193323Sed      break;
2290193323Sed    case X86::ADD32ri:
2291218893Sdim    case X86::ADD32ri8:
2292218893Sdim    case X86::ADD32ri_DB:
2293218893Sdim    case X86::ADD32ri8_DB: {
2294193323Sed      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
2295200581Srdivacky      unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
2296263509Sdim
2297263509Sdim      bool isKill, isUndef;
2298263509Sdim      unsigned SrcReg;
2299263509Sdim      MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
2300263509Sdim      if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
2301263509Sdim                          SrcReg, isKill, isUndef, ImplicitOp))
2302263509Sdim        return 0;
2303263509Sdim
2304263509Sdim      MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
2305263509Sdim          .addOperand(Dest)
2306263509Sdim          .addReg(SrcReg, getUndefRegState(isUndef) | getKillRegState(isKill));
2307263509Sdim      if (ImplicitOp.getReg() != 0)
2308263509Sdim        MIB.addOperand(ImplicitOp);
2309263509Sdim
2310263509Sdim      NewMI = addOffset(MIB, MI->getOperand(2).getImm());
2311193323Sed      break;
2312200581Srdivacky    }
2313193323Sed    case X86::ADD16ri:
2314193323Sed    case X86::ADD16ri8:
2315218893Sdim    case X86::ADD16ri_DB:
2316218893Sdim    case X86::ADD16ri8_DB:
2317200581Srdivacky      if (DisableLEA16)
2318200581Srdivacky        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
2319193323Sed      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
2320245431Sdim      NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
2321245431Sdim                        .addOperand(Dest).addOperand(Src),
2322245431Sdim                        MI->getOperand(2).getImm());
2323193323Sed      break;
2324193323Sed    }
2325193323Sed  }
2326193323Sed  }
2327193323Sed
2328193323Sed  if (!NewMI) return 0;
2329193323Sed
2330193323Sed  if (LV) {  // Update live variables
2331245431Sdim    if (Src.isKill())
2332245431Sdim      LV->replaceKillInstruction(Src.getReg(), MI, NewMI);
2333245431Sdim    if (Dest.isDead())
2334245431Sdim      LV->replaceKillInstruction(Dest.getReg(), MI, NewMI);
2335193323Sed  }
2336193323Sed
2337218893Sdim  MFI->insert(MBBI, NewMI);          // Insert the new inst
2338193323Sed  return NewMI;
2339193323Sed}
2340193323Sed
2341193323Sed/// commuteInstruction - We have a few instructions that must be hacked on to
2342193323Sed/// commute them.
2343193323Sed///
2344193323SedMachineInstr *
2345193323SedX86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
2346193323Sed  switch (MI->getOpcode()) {
2347193323Sed  case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
2348193323Sed  case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
2349193323Sed  case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
2350193323Sed  case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
2351193323Sed  case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
2352193323Sed  case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
2353193323Sed    unsigned Opc;
2354193323Sed    unsigned Size;
2355193323Sed    switch (MI->getOpcode()) {
2356198090Srdivacky    default: llvm_unreachable("Unreachable!");
2357193323Sed    case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
2358193323Sed    case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
2359193323Sed    case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
2360193323Sed    case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
2361193323Sed    case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
2362193323Sed    case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
2363193323Sed    }
2364193323Sed    unsigned Amt = MI->getOperand(3).getImm();
2365193323Sed    if (NewMI) {
2366193323Sed      MachineFunction &MF = *MI->getParent()->getParent();
2367193323Sed      MI = MF.CloneMachineInstr(MI);
2368193323Sed      NewMI = false;
2369193323Sed    }
2370193323Sed    MI->setDesc(get(Opc));
2371193323Sed    MI->getOperand(3).setImm(Size-Amt);
2372252723Sdim    return TargetInstrInfo::commuteInstruction(MI, NewMI);
2373193323Sed  }
2374245431Sdim  case X86::CMOVB16rr:  case X86::CMOVB32rr:  case X86::CMOVB64rr:
2375245431Sdim  case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
2376245431Sdim  case X86::CMOVE16rr:  case X86::CMOVE32rr:  case X86::CMOVE64rr:
2377245431Sdim  case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
2378245431Sdim  case X86::CMOVBE16rr: case X86::CMOVBE32rr: case X86::CMOVBE64rr:
2379245431Sdim  case X86::CMOVA16rr:  case X86::CMOVA32rr:  case X86::CMOVA64rr:
2380245431Sdim  case X86::CMOVL16rr:  case X86::CMOVL32rr:  case X86::CMOVL64rr:
2381245431Sdim  case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
2382245431Sdim  case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
2383245431Sdim  case X86::CMOVG16rr:  case X86::CMOVG32rr:  case X86::CMOVG64rr:
2384245431Sdim  case X86::CMOVS16rr:  case X86::CMOVS32rr:  case X86::CMOVS64rr:
2385245431Sdim  case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
2386245431Sdim  case X86::CMOVP16rr:  case X86::CMOVP32rr:  case X86::CMOVP64rr:
2387245431Sdim  case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
2388245431Sdim  case X86::CMOVO16rr:  case X86::CMOVO32rr:  case X86::CMOVO64rr:
2389245431Sdim  case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: {
2390245431Sdim    unsigned Opc;
2391193323Sed    switch (MI->getOpcode()) {
2392245431Sdim    default: llvm_unreachable("Unreachable!");
2393193323Sed    case X86::CMOVB16rr:  Opc = X86::CMOVAE16rr; break;
2394193323Sed    case X86::CMOVB32rr:  Opc = X86::CMOVAE32rr; break;
2395193323Sed    case X86::CMOVB64rr:  Opc = X86::CMOVAE64rr; break;
2396193323Sed    case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
2397193323Sed    case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
2398193323Sed    case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
2399193323Sed    case X86::CMOVE16rr:  Opc = X86::CMOVNE16rr; break;
2400193323Sed    case X86::CMOVE32rr:  Opc = X86::CMOVNE32rr; break;
2401193323Sed    case X86::CMOVE64rr:  Opc = X86::CMOVNE64rr; break;
2402193323Sed    case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
2403193323Sed    case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
2404193323Sed    case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
2405193323Sed    case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
2406193323Sed    case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
2407193323Sed    case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
2408193323Sed    case X86::CMOVA16rr:  Opc = X86::CMOVBE16rr; break;
2409193323Sed    case X86::CMOVA32rr:  Opc = X86::CMOVBE32rr; break;
2410193323Sed    case X86::CMOVA64rr:  Opc = X86::CMOVBE64rr; break;
2411193323Sed    case X86::CMOVL16rr:  Opc = X86::CMOVGE16rr; break;
2412193323Sed    case X86::CMOVL32rr:  Opc = X86::CMOVGE32rr; break;
2413193323Sed    case X86::CMOVL64rr:  Opc = X86::CMOVGE64rr; break;
2414193323Sed    case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
2415193323Sed    case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
2416193323Sed    case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
2417193323Sed    case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
2418193323Sed    case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
2419193323Sed    case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
2420193323Sed    case X86::CMOVG16rr:  Opc = X86::CMOVLE16rr; break;
2421193323Sed    case X86::CMOVG32rr:  Opc = X86::CMOVLE32rr; break;
2422193323Sed    case X86::CMOVG64rr:  Opc = X86::CMOVLE64rr; break;
2423193323Sed    case X86::CMOVS16rr:  Opc = X86::CMOVNS16rr; break;
2424193323Sed    case X86::CMOVS32rr:  Opc = X86::CMOVNS32rr; break;
2425193323Sed    case X86::CMOVS64rr:  Opc = X86::CMOVNS64rr; break;
2426193323Sed    case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
2427193323Sed    case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
2428193323Sed    case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
2429193323Sed    case X86::CMOVP16rr:  Opc = X86::CMOVNP16rr; break;
2430193323Sed    case X86::CMOVP32rr:  Opc = X86::CMOVNP32rr; break;
2431193323Sed    case X86::CMOVP64rr:  Opc = X86::CMOVNP64rr; break;
2432193323Sed    case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
2433193323Sed    case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
2434193323Sed    case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
2435193323Sed    case X86::CMOVO16rr:  Opc = X86::CMOVNO16rr; break;
2436193323Sed    case X86::CMOVO32rr:  Opc = X86::CMOVNO32rr; break;
2437193323Sed    case X86::CMOVO64rr:  Opc = X86::CMOVNO64rr; break;
2438193323Sed    case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
2439193323Sed    case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
2440193323Sed    case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
2441193323Sed    }
2442193323Sed    if (NewMI) {
2443193323Sed      MachineFunction &MF = *MI->getParent()->getParent();
2444193323Sed      MI = MF.CloneMachineInstr(MI);
2445193323Sed      NewMI = false;
2446193323Sed    }
2447193323Sed    MI->setDesc(get(Opc));
2448193323Sed    // Fallthrough intended.
2449193323Sed  }
2450193323Sed  default:
2451252723Sdim    return TargetInstrInfo::commuteInstruction(MI, NewMI);
2452193323Sed  }
2453193323Sed}
2454193323Sed
2455245431Sdimstatic X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
2456193323Sed  switch (BrOpc) {
2457193323Sed  default: return X86::COND_INVALID;
2458203954Srdivacky  case X86::JE_4:  return X86::COND_E;
2459203954Srdivacky  case X86::JNE_4: return X86::COND_NE;
2460203954Srdivacky  case X86::JL_4:  return X86::COND_L;
2461203954Srdivacky  case X86::JLE_4: return X86::COND_LE;
2462203954Srdivacky  case X86::JG_4:  return X86::COND_G;
2463203954Srdivacky  case X86::JGE_4: return X86::COND_GE;
2464203954Srdivacky  case X86::JB_4:  return X86::COND_B;
2465203954Srdivacky  case X86::JBE_4: return X86::COND_BE;
2466203954Srdivacky  case X86::JA_4:  return X86::COND_A;
2467203954Srdivacky  case X86::JAE_4: return X86::COND_AE;
2468203954Srdivacky  case X86::JS_4:  return X86::COND_S;
2469203954Srdivacky  case X86::JNS_4: return X86::COND_NS;
2470203954Srdivacky  case X86::JP_4:  return X86::COND_P;
2471203954Srdivacky  case X86::JNP_4: return X86::COND_NP;
2472203954Srdivacky  case X86::JO_4:  return X86::COND_O;
2473203954Srdivacky  case X86::JNO_4: return X86::COND_NO;
2474193323Sed  }
2475193323Sed}
2476193323Sed
2477245431Sdim/// getCondFromSETOpc - return condition code of a SET opcode.
2478245431Sdimstatic X86::CondCode getCondFromSETOpc(unsigned Opc) {
2479245431Sdim  switch (Opc) {
2480245431Sdim  default: return X86::COND_INVALID;
2481245431Sdim  case X86::SETAr:  case X86::SETAm:  return X86::COND_A;
2482245431Sdim  case X86::SETAEr: case X86::SETAEm: return X86::COND_AE;
2483245431Sdim  case X86::SETBr:  case X86::SETBm:  return X86::COND_B;
2484245431Sdim  case X86::SETBEr: case X86::SETBEm: return X86::COND_BE;
2485245431Sdim  case X86::SETEr:  case X86::SETEm:  return X86::COND_E;
2486245431Sdim  case X86::SETGr:  case X86::SETGm:  return X86::COND_G;
2487245431Sdim  case X86::SETGEr: case X86::SETGEm: return X86::COND_GE;
2488245431Sdim  case X86::SETLr:  case X86::SETLm:  return X86::COND_L;
2489245431Sdim  case X86::SETLEr: case X86::SETLEm: return X86::COND_LE;
2490245431Sdim  case X86::SETNEr: case X86::SETNEm: return X86::COND_NE;
2491245431Sdim  case X86::SETNOr: case X86::SETNOm: return X86::COND_NO;
2492245431Sdim  case X86::SETNPr: case X86::SETNPm: return X86::COND_NP;
2493245431Sdim  case X86::SETNSr: case X86::SETNSm: return X86::COND_NS;
2494245431Sdim  case X86::SETOr:  case X86::SETOm:  return X86::COND_O;
2495245431Sdim  case X86::SETPr:  case X86::SETPm:  return X86::COND_P;
2496245431Sdim  case X86::SETSr:  case X86::SETSm:  return X86::COND_S;
2497245431Sdim  }
2498245431Sdim}
2499245431Sdim
2500245431Sdim/// getCondFromCmovOpc - return condition code of a CMov opcode.
2501245431SdimX86::CondCode X86::getCondFromCMovOpc(unsigned Opc) {
2502245431Sdim  switch (Opc) {
2503245431Sdim  default: return X86::COND_INVALID;
2504245431Sdim  case X86::CMOVA16rm:  case X86::CMOVA16rr:  case X86::CMOVA32rm:
2505245431Sdim  case X86::CMOVA32rr:  case X86::CMOVA64rm:  case X86::CMOVA64rr:
2506245431Sdim    return X86::COND_A;
2507245431Sdim  case X86::CMOVAE16rm: case X86::CMOVAE16rr: case X86::CMOVAE32rm:
2508245431Sdim  case X86::CMOVAE32rr: case X86::CMOVAE64rm: case X86::CMOVAE64rr:
2509245431Sdim    return X86::COND_AE;
2510245431Sdim  case X86::CMOVB16rm:  case X86::CMOVB16rr:  case X86::CMOVB32rm:
2511245431Sdim  case X86::CMOVB32rr:  case X86::CMOVB64rm:  case X86::CMOVB64rr:
2512245431Sdim    return X86::COND_B;
2513245431Sdim  case X86::CMOVBE16rm: case X86::CMOVBE16rr: case X86::CMOVBE32rm:
2514245431Sdim  case X86::CMOVBE32rr: case X86::CMOVBE64rm: case X86::CMOVBE64rr:
2515245431Sdim    return X86::COND_BE;
2516245431Sdim  case X86::CMOVE16rm:  case X86::CMOVE16rr:  case X86::CMOVE32rm:
2517245431Sdim  case X86::CMOVE32rr:  case X86::CMOVE64rm:  case X86::CMOVE64rr:
2518245431Sdim    return X86::COND_E;
2519245431Sdim  case X86::CMOVG16rm:  case X86::CMOVG16rr:  case X86::CMOVG32rm:
2520245431Sdim  case X86::CMOVG32rr:  case X86::CMOVG64rm:  case X86::CMOVG64rr:
2521245431Sdim    return X86::COND_G;
2522245431Sdim  case X86::CMOVGE16rm: case X86::CMOVGE16rr: case X86::CMOVGE32rm:
2523245431Sdim  case X86::CMOVGE32rr: case X86::CMOVGE64rm: case X86::CMOVGE64rr:
2524245431Sdim    return X86::COND_GE;
2525245431Sdim  case X86::CMOVL16rm:  case X86::CMOVL16rr:  case X86::CMOVL32rm:
2526245431Sdim  case X86::CMOVL32rr:  case X86::CMOVL64rm:  case X86::CMOVL64rr:
2527245431Sdim    return X86::COND_L;
2528245431Sdim  case X86::CMOVLE16rm: case X86::CMOVLE16rr: case X86::CMOVLE32rm:
2529245431Sdim  case X86::CMOVLE32rr: case X86::CMOVLE64rm: case X86::CMOVLE64rr:
2530245431Sdim    return X86::COND_LE;
2531245431Sdim  case X86::CMOVNE16rm: case X86::CMOVNE16rr: case X86::CMOVNE32rm:
2532245431Sdim  case X86::CMOVNE32rr: case X86::CMOVNE64rm: case X86::CMOVNE64rr:
2533245431Sdim    return X86::COND_NE;
2534245431Sdim  case X86::CMOVNO16rm: case X86::CMOVNO16rr: case X86::CMOVNO32rm:
2535245431Sdim  case X86::CMOVNO32rr: case X86::CMOVNO64rm: case X86::CMOVNO64rr:
2536245431Sdim    return X86::COND_NO;
2537245431Sdim  case X86::CMOVNP16rm: case X86::CMOVNP16rr: case X86::CMOVNP32rm:
2538245431Sdim  case X86::CMOVNP32rr: case X86::CMOVNP64rm: case X86::CMOVNP64rr:
2539245431Sdim    return X86::COND_NP;
2540245431Sdim  case X86::CMOVNS16rm: case X86::CMOVNS16rr: case X86::CMOVNS32rm:
2541245431Sdim  case X86::CMOVNS32rr: case X86::CMOVNS64rm: case X86::CMOVNS64rr:
2542245431Sdim    return X86::COND_NS;
2543245431Sdim  case X86::CMOVO16rm:  case X86::CMOVO16rr:  case X86::CMOVO32rm:
2544245431Sdim  case X86::CMOVO32rr:  case X86::CMOVO64rm:  case X86::CMOVO64rr:
2545245431Sdim    return X86::COND_O;
2546245431Sdim  case X86::CMOVP16rm:  case X86::CMOVP16rr:  case X86::CMOVP32rm:
2547245431Sdim  case X86::CMOVP32rr:  case X86::CMOVP64rm:  case X86::CMOVP64rr:
2548245431Sdim    return X86::COND_P;
2549245431Sdim  case X86::CMOVS16rm:  case X86::CMOVS16rr:  case X86::CMOVS32rm:
2550245431Sdim  case X86::CMOVS32rr:  case X86::CMOVS64rm:  case X86::CMOVS64rr:
2551245431Sdim    return X86::COND_S;
2552245431Sdim  }
2553245431Sdim}
2554245431Sdim
2555193323Sedunsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
2556193323Sed  switch (CC) {
2557198090Srdivacky  default: llvm_unreachable("Illegal condition code!");
2558203954Srdivacky  case X86::COND_E:  return X86::JE_4;
2559203954Srdivacky  case X86::COND_NE: return X86::JNE_4;
2560203954Srdivacky  case X86::COND_L:  return X86::JL_4;
2561203954Srdivacky  case X86::COND_LE: return X86::JLE_4;
2562203954Srdivacky  case X86::COND_G:  return X86::JG_4;
2563203954Srdivacky  case X86::COND_GE: return X86::JGE_4;
2564203954Srdivacky  case X86::COND_B:  return X86::JB_4;
2565203954Srdivacky  case X86::COND_BE: return X86::JBE_4;
2566203954Srdivacky  case X86::COND_A:  return X86::JA_4;
2567203954Srdivacky  case X86::COND_AE: return X86::JAE_4;
2568203954Srdivacky  case X86::COND_S:  return X86::JS_4;
2569203954Srdivacky  case X86::COND_NS: return X86::JNS_4;
2570203954Srdivacky  case X86::COND_P:  return X86::JP_4;
2571203954Srdivacky  case X86::COND_NP: return X86::JNP_4;
2572203954Srdivacky  case X86::COND_O:  return X86::JO_4;
2573203954Srdivacky  case X86::COND_NO: return X86::JNO_4;
2574193323Sed  }
2575193323Sed}
2576193323Sed
2577193323Sed/// GetOppositeBranchCondition - Return the inverse of the specified condition,
2578193323Sed/// e.g. turning COND_E to COND_NE.
2579193323SedX86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
2580193323Sed  switch (CC) {
2581198090Srdivacky  default: llvm_unreachable("Illegal condition code!");
2582193323Sed  case X86::COND_E:  return X86::COND_NE;
2583193323Sed  case X86::COND_NE: return X86::COND_E;
2584193323Sed  case X86::COND_L:  return X86::COND_GE;
2585193323Sed  case X86::COND_LE: return X86::COND_G;
2586193323Sed  case X86::COND_G:  return X86::COND_LE;
2587193323Sed  case X86::COND_GE: return X86::COND_L;
2588193323Sed  case X86::COND_B:  return X86::COND_AE;
2589193323Sed  case X86::COND_BE: return X86::COND_A;
2590193323Sed  case X86::COND_A:  return X86::COND_BE;
2591193323Sed  case X86::COND_AE: return X86::COND_B;
2592193323Sed  case X86::COND_S:  return X86::COND_NS;
2593193323Sed  case X86::COND_NS: return X86::COND_S;
2594193323Sed  case X86::COND_P:  return X86::COND_NP;
2595193323Sed  case X86::COND_NP: return X86::COND_P;
2596193323Sed  case X86::COND_O:  return X86::COND_NO;
2597193323Sed  case X86::COND_NO: return X86::COND_O;
2598193323Sed  }
2599193323Sed}
2600193323Sed
2601245431Sdim/// getSwappedCondition - assume the flags are set by MI(a,b), return
2602245431Sdim/// the condition code if we modify the instructions such that flags are
2603245431Sdim/// set by MI(b,a).
2604245431Sdimstatic X86::CondCode getSwappedCondition(X86::CondCode CC) {
2605245431Sdim  switch (CC) {
2606245431Sdim  default: return X86::COND_INVALID;
2607245431Sdim  case X86::COND_E:  return X86::COND_E;
2608245431Sdim  case X86::COND_NE: return X86::COND_NE;
2609245431Sdim  case X86::COND_L:  return X86::COND_G;
2610245431Sdim  case X86::COND_LE: return X86::COND_GE;
2611245431Sdim  case X86::COND_G:  return X86::COND_L;
2612245431Sdim  case X86::COND_GE: return X86::COND_LE;
2613245431Sdim  case X86::COND_B:  return X86::COND_A;
2614245431Sdim  case X86::COND_BE: return X86::COND_AE;
2615245431Sdim  case X86::COND_A:  return X86::COND_B;
2616245431Sdim  case X86::COND_AE: return X86::COND_BE;
2617245431Sdim  }
2618245431Sdim}
2619245431Sdim
2620245431Sdim/// getSETFromCond - Return a set opcode for the given condition and
2621245431Sdim/// whether it has memory operand.
2622245431Sdimstatic unsigned getSETFromCond(X86::CondCode CC,
2623245431Sdim                               bool HasMemoryOperand) {
2624245431Sdim  static const uint16_t Opc[16][2] = {
2625245431Sdim    { X86::SETAr,  X86::SETAm  },
2626245431Sdim    { X86::SETAEr, X86::SETAEm },
2627245431Sdim    { X86::SETBr,  X86::SETBm  },
2628245431Sdim    { X86::SETBEr, X86::SETBEm },
2629245431Sdim    { X86::SETEr,  X86::SETEm  },
2630245431Sdim    { X86::SETGr,  X86::SETGm  },
2631245431Sdim    { X86::SETGEr, X86::SETGEm },
2632245431Sdim    { X86::SETLr,  X86::SETLm  },
2633245431Sdim    { X86::SETLEr, X86::SETLEm },
2634245431Sdim    { X86::SETNEr, X86::SETNEm },
2635245431Sdim    { X86::SETNOr, X86::SETNOm },
2636245431Sdim    { X86::SETNPr, X86::SETNPm },
2637245431Sdim    { X86::SETNSr, X86::SETNSm },
2638245431Sdim    { X86::SETOr,  X86::SETOm  },
2639245431Sdim    { X86::SETPr,  X86::SETPm  },
2640245431Sdim    { X86::SETSr,  X86::SETSm  }
2641245431Sdim  };
2642245431Sdim
2643245431Sdim  assert(CC < 16 && "Can only handle standard cond codes");
2644245431Sdim  return Opc[CC][HasMemoryOperand ? 1 : 0];
2645245431Sdim}
2646245431Sdim
2647245431Sdim/// getCMovFromCond - Return a cmov opcode for the given condition,
2648245431Sdim/// register size in bytes, and operand type.
2649245431Sdimstatic unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes,
2650245431Sdim                                bool HasMemoryOperand) {
2651245431Sdim  static const uint16_t Opc[32][3] = {
2652245431Sdim    { X86::CMOVA16rr,  X86::CMOVA32rr,  X86::CMOVA64rr  },
2653245431Sdim    { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
2654245431Sdim    { X86::CMOVB16rr,  X86::CMOVB32rr,  X86::CMOVB64rr  },
2655245431Sdim    { X86::CMOVBE16rr, X86::CMOVBE32rr, X86::CMOVBE64rr },
2656245431Sdim    { X86::CMOVE16rr,  X86::CMOVE32rr,  X86::CMOVE64rr  },
2657245431Sdim    { X86::CMOVG16rr,  X86::CMOVG32rr,  X86::CMOVG64rr  },
2658245431Sdim    { X86::CMOVGE16rr, X86::CMOVGE32rr, X86::CMOVGE64rr },
2659245431Sdim    { X86::CMOVL16rr,  X86::CMOVL32rr,  X86::CMOVL64rr  },
2660245431Sdim    { X86::CMOVLE16rr, X86::CMOVLE32rr, X86::CMOVLE64rr },
2661245431Sdim    { X86::CMOVNE16rr, X86::CMOVNE32rr, X86::CMOVNE64rr },
2662245431Sdim    { X86::CMOVNO16rr, X86::CMOVNO32rr, X86::CMOVNO64rr },
2663245431Sdim    { X86::CMOVNP16rr, X86::CMOVNP32rr, X86::CMOVNP64rr },
2664245431Sdim    { X86::CMOVNS16rr, X86::CMOVNS32rr, X86::CMOVNS64rr },
2665245431Sdim    { X86::CMOVO16rr,  X86::CMOVO32rr,  X86::CMOVO64rr  },
2666245431Sdim    { X86::CMOVP16rr,  X86::CMOVP32rr,  X86::CMOVP64rr  },
2667245431Sdim    { X86::CMOVS16rr,  X86::CMOVS32rr,  X86::CMOVS64rr  },
2668245431Sdim    { X86::CMOVA16rm,  X86::CMOVA32rm,  X86::CMOVA64rm  },
2669245431Sdim    { X86::CMOVAE16rm, X86::CMOVAE32rm, X86::CMOVAE64rm },
2670245431Sdim    { X86::CMOVB16rm,  X86::CMOVB32rm,  X86::CMOVB64rm  },
2671245431Sdim    { X86::CMOVBE16rm, X86::CMOVBE32rm, X86::CMOVBE64rm },
2672245431Sdim    { X86::CMOVE16rm,  X86::CMOVE32rm,  X86::CMOVE64rm  },
2673245431Sdim    { X86::CMOVG16rm,  X86::CMOVG32rm,  X86::CMOVG64rm  },
2674245431Sdim    { X86::CMOVGE16rm, X86::CMOVGE32rm, X86::CMOVGE64rm },
2675245431Sdim    { X86::CMOVL16rm,  X86::CMOVL32rm,  X86::CMOVL64rm  },
2676245431Sdim    { X86::CMOVLE16rm, X86::CMOVLE32rm, X86::CMOVLE64rm },
2677245431Sdim    { X86::CMOVNE16rm, X86::CMOVNE32rm, X86::CMOVNE64rm },
2678245431Sdim    { X86::CMOVNO16rm, X86::CMOVNO32rm, X86::CMOVNO64rm },
2679245431Sdim    { X86::CMOVNP16rm, X86::CMOVNP32rm, X86::CMOVNP64rm },
2680245431Sdim    { X86::CMOVNS16rm, X86::CMOVNS32rm, X86::CMOVNS64rm },
2681245431Sdim    { X86::CMOVO16rm,  X86::CMOVO32rm,  X86::CMOVO64rm  },
2682245431Sdim    { X86::CMOVP16rm,  X86::CMOVP32rm,  X86::CMOVP64rm  },
2683245431Sdim    { X86::CMOVS16rm,  X86::CMOVS32rm,  X86::CMOVS64rm  }
2684245431Sdim  };
2685245431Sdim
2686245431Sdim  assert(CC < 16 && "Can only handle standard cond codes");
2687245431Sdim  unsigned Idx = HasMemoryOperand ? 16+CC : CC;
2688245431Sdim  switch(RegBytes) {
2689245431Sdim  default: llvm_unreachable("Illegal register size!");
2690245431Sdim  case 2: return Opc[Idx][0];
2691245431Sdim  case 4: return Opc[Idx][1];
2692245431Sdim  case 8: return Opc[Idx][2];
2693245431Sdim  }
2694245431Sdim}
2695245431Sdim
2696193323Sedbool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
2697235633Sdim  if (!MI->isTerminator()) return false;
2698218893Sdim
2699193323Sed  // Conditional branch is a special case.
2700235633Sdim  if (MI->isBranch() && !MI->isBarrier())
2701193323Sed    return true;
2702235633Sdim  if (!MI->isPredicable())
2703193323Sed    return true;
2704193323Sed  return !isPredicated(MI);
2705193323Sed}
2706193323Sed
2707218893Sdimbool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
2708193323Sed                                 MachineBasicBlock *&TBB,
2709193323Sed                                 MachineBasicBlock *&FBB,
2710193323Sed                                 SmallVectorImpl<MachineOperand> &Cond,
2711193323Sed                                 bool AllowModify) const {
2712193323Sed  // Start from the bottom of the block and work up, examining the
2713193323Sed  // terminator instructions.
2714193323Sed  MachineBasicBlock::iterator I = MBB.end();
2715207618Srdivacky  MachineBasicBlock::iterator UnCondBrIter = MBB.end();
2716193323Sed  while (I != MBB.begin()) {
2717193323Sed    --I;
2718206083Srdivacky    if (I->isDebugValue())
2719206083Srdivacky      continue;
2720200581Srdivacky
2721200581Srdivacky    // Working from the bottom, when we see a non-terminator instruction, we're
2722200581Srdivacky    // done.
2723212904Sdim    if (!isUnpredicatedTerminator(I))
2724193323Sed      break;
2725200581Srdivacky
2726200581Srdivacky    // A terminator that isn't a branch can't easily be handled by this
2727200581Srdivacky    // analysis.
2728235633Sdim    if (!I->isBranch())
2729193323Sed      return true;
2730200581Srdivacky
2731193323Sed    // Handle unconditional branches.
2732203954Srdivacky    if (I->getOpcode() == X86::JMP_4) {
2733207618Srdivacky      UnCondBrIter = I;
2734207618Srdivacky
2735193323Sed      if (!AllowModify) {
2736193323Sed        TBB = I->getOperand(0).getMBB();
2737193323Sed        continue;
2738193323Sed      }
2739193323Sed
2740193323Sed      // If the block has any instructions after a JMP, delete them.
2741200581Srdivacky      while (llvm::next(I) != MBB.end())
2742200581Srdivacky        llvm::next(I)->eraseFromParent();
2743200581Srdivacky
2744193323Sed      Cond.clear();
2745193323Sed      FBB = 0;
2746200581Srdivacky
2747193323Sed      // Delete the JMP if it's equivalent to a fall-through.
2748193323Sed      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
2749193323Sed        TBB = 0;
2750193323Sed        I->eraseFromParent();
2751193323Sed        I = MBB.end();
2752207618Srdivacky        UnCondBrIter = MBB.end();
2753193323Sed        continue;
2754193323Sed      }
2755200581Srdivacky
2756207618Srdivacky      // TBB is used to indicate the unconditional destination.
2757193323Sed      TBB = I->getOperand(0).getMBB();
2758193323Sed      continue;
2759193323Sed    }
2760200581Srdivacky
2761193323Sed    // Handle conditional branches.
2762245431Sdim    X86::CondCode BranchCode = getCondFromBranchOpc(I->getOpcode());
2763193323Sed    if (BranchCode == X86::COND_INVALID)
2764193323Sed      return true;  // Can't handle indirect branch.
2765200581Srdivacky
2766193323Sed    // Working from the bottom, handle the first conditional branch.
2767193323Sed    if (Cond.empty()) {
2768207618Srdivacky      MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
2769207618Srdivacky      if (AllowModify && UnCondBrIter != MBB.end() &&
2770207618Srdivacky          MBB.isLayoutSuccessor(TargetBB)) {
2771207618Srdivacky        // If we can modify the code and it ends in something like:
2772207618Srdivacky        //
2773207618Srdivacky        //     jCC L1
2774207618Srdivacky        //     jmp L2
2775207618Srdivacky        //   L1:
2776207618Srdivacky        //     ...
2777207618Srdivacky        //   L2:
2778207618Srdivacky        //
2779207618Srdivacky        // Then we can change this to:
2780207618Srdivacky        //
2781207618Srdivacky        //     jnCC L2
2782207618Srdivacky        //   L1:
2783207618Srdivacky        //     ...
2784207618Srdivacky        //   L2:
2785207618Srdivacky        //
2786207618Srdivacky        // Which is a bit more efficient.
2787207618Srdivacky        // We conditionally jump to the fall-through block.
2788207618Srdivacky        BranchCode = GetOppositeBranchCondition(BranchCode);
2789207618Srdivacky        unsigned JNCC = GetCondBranchFromCond(BranchCode);
2790207618Srdivacky        MachineBasicBlock::iterator OldInst = I;
2791207618Srdivacky
2792207618Srdivacky        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
2793207618Srdivacky          .addMBB(UnCondBrIter->getOperand(0).getMBB());
2794207618Srdivacky        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
2795207618Srdivacky          .addMBB(TargetBB);
2796207618Srdivacky
2797207618Srdivacky        OldInst->eraseFromParent();
2798207618Srdivacky        UnCondBrIter->eraseFromParent();
2799207618Srdivacky
2800207618Srdivacky        // Restart the analysis.
2801207618Srdivacky        UnCondBrIter = MBB.end();
2802207618Srdivacky        I = MBB.end();
2803207618Srdivacky        continue;
2804207618Srdivacky      }
2805207618Srdivacky
2806193323Sed      FBB = TBB;
2807193323Sed      TBB = I->getOperand(0).getMBB();
2808193323Sed      Cond.push_back(MachineOperand::CreateImm(BranchCode));
2809193323Sed      continue;
2810193323Sed    }
2811200581Srdivacky
2812200581Srdivacky    // Handle subsequent conditional branches. Only handle the case where all
2813200581Srdivacky    // conditional branches branch to the same destination and their condition
2814200581Srdivacky    // opcodes fit one of the special multi-branch idioms.
2815193323Sed    assert(Cond.size() == 1);
2816193323Sed    assert(TBB);
2817200581Srdivacky
2818200581Srdivacky    // Only handle the case where all conditional branches branch to the same
2819200581Srdivacky    // destination.
2820193323Sed    if (TBB != I->getOperand(0).getMBB())
2821193323Sed      return true;
2822200581Srdivacky
2823200581Srdivacky    // If the conditions are the same, we can leave them alone.
2824193323Sed    X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
2825193323Sed    if (OldBranchCode == BranchCode)
2826193323Sed      continue;
2827200581Srdivacky
2828200581Srdivacky    // If they differ, see if they fit one of the known patterns. Theoretically,
2829200581Srdivacky    // we could handle more patterns here, but we shouldn't expect to see them
2830200581Srdivacky    // if instruction selection has done a reasonable job.
2831193323Sed    if ((OldBranchCode == X86::COND_NP &&
2832193323Sed         BranchCode == X86::COND_E) ||
2833193323Sed        (OldBranchCode == X86::COND_E &&
2834193323Sed         BranchCode == X86::COND_NP))
2835193323Sed      BranchCode = X86::COND_NP_OR_E;
2836193323Sed    else if ((OldBranchCode == X86::COND_P &&
2837193323Sed              BranchCode == X86::COND_NE) ||
2838193323Sed             (OldBranchCode == X86::COND_NE &&
2839193323Sed              BranchCode == X86::COND_P))
2840193323Sed      BranchCode = X86::COND_NE_OR_P;
2841193323Sed    else
2842193323Sed      return true;
2843200581Srdivacky
2844193323Sed    // Update the MachineOperand.
2845193323Sed    Cond[0].setImm(BranchCode);
2846193323Sed  }
2847193323Sed
2848193323Sed  return false;
2849193323Sed}
2850193323Sed
2851193323Sedunsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
2852193323Sed  MachineBasicBlock::iterator I = MBB.end();
2853193323Sed  unsigned Count = 0;
2854193323Sed
2855193323Sed  while (I != MBB.begin()) {
2856193323Sed    --I;
2857206083Srdivacky    if (I->isDebugValue())
2858206083Srdivacky      continue;
2859203954Srdivacky    if (I->getOpcode() != X86::JMP_4 &&
2860245431Sdim        getCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
2861193323Sed      break;
2862193323Sed    // Remove the branch.
2863193323Sed    I->eraseFromParent();
2864193323Sed    I = MBB.end();
2865193323Sed    ++Count;
2866193323Sed  }
2867218893Sdim
2868193323Sed  return Count;
2869193323Sed}
2870193323Sed
2871193323Sedunsigned
2872193323SedX86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
2873193323Sed                           MachineBasicBlock *FBB,
2874210299Sed                           const SmallVectorImpl<MachineOperand> &Cond,
2875210299Sed                           DebugLoc DL) const {
2876193323Sed  // Shouldn't be a fall through.
2877193323Sed  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
2878193323Sed  assert((Cond.size() == 1 || Cond.size() == 0) &&
2879193323Sed         "X86 branch conditions have one component!");
2880193323Sed
2881193323Sed  if (Cond.empty()) {
2882193323Sed    // Unconditional branch?
2883193323Sed    assert(!FBB && "Unconditional branch with multiple successors!");
2884210299Sed    BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB);
2885193323Sed    return 1;
2886193323Sed  }
2887193323Sed
2888193323Sed  // Conditional branch.
2889193323Sed  unsigned Count = 0;
2890193323Sed  X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
2891193323Sed  switch (CC) {
2892193323Sed  case X86::COND_NP_OR_E:
2893193323Sed    // Synthesize NP_OR_E with two branches.
2894210299Sed    BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB);
2895193323Sed    ++Count;
2896210299Sed    BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB);
2897193323Sed    ++Count;
2898193323Sed    break;
2899193323Sed  case X86::COND_NE_OR_P:
2900193323Sed    // Synthesize NE_OR_P with two branches.
2901210299Sed    BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB);
2902193323Sed    ++Count;
2903210299Sed    BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB);
2904193323Sed    ++Count;
2905193323Sed    break;
2906193323Sed  default: {
2907193323Sed    unsigned Opc = GetCondBranchFromCond(CC);
2908210299Sed    BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
2909193323Sed    ++Count;
2910193323Sed  }
2911193323Sed  }
2912193323Sed  if (FBB) {
2913193323Sed    // Two-way Conditional branch. Insert the second branch.
2914210299Sed    BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB);
2915193323Sed    ++Count;
2916193323Sed  }
2917193323Sed  return Count;
2918193323Sed}
2919193323Sed
2920245431Sdimbool X86InstrInfo::
2921245431SdimcanInsertSelect(const MachineBasicBlock &MBB,
2922245431Sdim                const SmallVectorImpl<MachineOperand> &Cond,
2923245431Sdim                unsigned TrueReg, unsigned FalseReg,
2924245431Sdim                int &CondCycles, int &TrueCycles, int &FalseCycles) const {
2925245431Sdim  // Not all subtargets have cmov instructions.
2926245431Sdim  if (!TM.getSubtarget<X86Subtarget>().hasCMov())
2927245431Sdim    return false;
2928245431Sdim  if (Cond.size() != 1)
2929245431Sdim    return false;
2930245431Sdim  // We cannot do the composite conditions, at least not in SSA form.
2931245431Sdim  if ((X86::CondCode)Cond[0].getImm() > X86::COND_S)
2932245431Sdim    return false;
2933245431Sdim
2934245431Sdim  // Check register classes.
2935245431Sdim  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2936245431Sdim  const TargetRegisterClass *RC =
2937245431Sdim    RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
2938245431Sdim  if (!RC)
2939245431Sdim    return false;
2940245431Sdim
2941245431Sdim  // We have cmov instructions for 16, 32, and 64 bit general purpose registers.
2942245431Sdim  if (X86::GR16RegClass.hasSubClassEq(RC) ||
2943245431Sdim      X86::GR32RegClass.hasSubClassEq(RC) ||
2944245431Sdim      X86::GR64RegClass.hasSubClassEq(RC)) {
2945245431Sdim    // This latency applies to Pentium M, Merom, Wolfdale, Nehalem, and Sandy
2946245431Sdim    // Bridge. Probably Ivy Bridge as well.
2947245431Sdim    CondCycles = 2;
2948245431Sdim    TrueCycles = 2;
2949245431Sdim    FalseCycles = 2;
2950245431Sdim    return true;
2951245431Sdim  }
2952245431Sdim
2953245431Sdim  // Can't do vectors.
2954245431Sdim  return false;
2955245431Sdim}
2956245431Sdim
2957245431Sdimvoid X86InstrInfo::insertSelect(MachineBasicBlock &MBB,
2958245431Sdim                                MachineBasicBlock::iterator I, DebugLoc DL,
2959245431Sdim                                unsigned DstReg,
2960245431Sdim                                const SmallVectorImpl<MachineOperand> &Cond,
2961245431Sdim                                unsigned TrueReg, unsigned FalseReg) const {
2962245431Sdim   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
2963245431Sdim   assert(Cond.size() == 1 && "Invalid Cond array");
2964245431Sdim   unsigned Opc = getCMovFromCond((X86::CondCode)Cond[0].getImm(),
2965245431Sdim                                  MRI.getRegClass(DstReg)->getSize(),
2966245431Sdim                                  false/*HasMemoryOperand*/);
2967245431Sdim   BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(FalseReg).addReg(TrueReg);
2968245431Sdim}
2969245431Sdim
2970193323Sed/// isHReg - Test if the given register is a physical h register.
2971193323Sedstatic bool isHReg(unsigned Reg) {
2972193323Sed  return X86::GR8_ABCD_HRegClass.contains(Reg);
2973193323Sed}
2974193323Sed
2975212904Sdim// Try and copy between VR128/VR64 and GR64 registers.
2976226890Sdimstatic unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
2977263509Sdim                                        const X86Subtarget& Subtarget) {
2978263509Sdim
2979263509Sdim
2980212904Sdim  // SrcReg(VR128) -> DestReg(GR64)
2981212904Sdim  // SrcReg(VR64)  -> DestReg(GR64)
2982212904Sdim  // SrcReg(GR64)  -> DestReg(VR128)
2983212904Sdim  // SrcReg(GR64)  -> DestReg(VR64)
2984212904Sdim
2985263509Sdim  bool HasAVX = Subtarget.hasAVX();
2986263509Sdim  bool HasAVX512 = Subtarget.hasAVX512();
2987212904Sdim  if (X86::GR64RegClass.contains(DestReg)) {
2988263509Sdim    if (X86::VR128XRegClass.contains(SrcReg))
2989212904Sdim      // Copy from a VR128 register to a GR64 register.
2990263509Sdim      return HasAVX512 ? X86::VMOVPQIto64Zrr: (HasAVX ? X86::VMOVPQIto64rr :
2991263509Sdim                                               X86::MOVPQIto64rr);
2992245431Sdim    if (X86::VR64RegClass.contains(SrcReg))
2993212904Sdim      // Copy from a VR64 register to a GR64 register.
2994212904Sdim      return X86::MOVSDto64rr;
2995212904Sdim  } else if (X86::GR64RegClass.contains(SrcReg)) {
2996212904Sdim    // Copy from a GR64 register to a VR128 register.
2997263509Sdim    if (X86::VR128XRegClass.contains(DestReg))
2998263509Sdim      return HasAVX512 ? X86::VMOV64toPQIZrr: (HasAVX ? X86::VMOV64toPQIrr :
2999263509Sdim                                               X86::MOV64toPQIrr);
3000212904Sdim    // Copy from a GR64 register to a VR64 register.
3001245431Sdim    if (X86::VR64RegClass.contains(DestReg))
3002212904Sdim      return X86::MOV64toSDrr;
3003212904Sdim  }
3004212904Sdim
3005226890Sdim  // SrcReg(FR32) -> DestReg(GR32)
3006226890Sdim  // SrcReg(GR32) -> DestReg(FR32)
3007226890Sdim
3008263509Sdim  if (X86::GR32RegClass.contains(DestReg) && X86::FR32XRegClass.contains(SrcReg))
3009245431Sdim    // Copy from a FR32 register to a GR32 register.
3010263509Sdim    return HasAVX512 ? X86::VMOVSS2DIZrr : (HasAVX ? X86::VMOVSS2DIrr : X86::MOVSS2DIrr);
3011226890Sdim
3012263509Sdim  if (X86::FR32XRegClass.contains(DestReg) && X86::GR32RegClass.contains(SrcReg))
3013245431Sdim    // Copy from a GR32 register to a FR32 register.
3014263509Sdim    return HasAVX512 ? X86::VMOVDI2SSZrr : (HasAVX ? X86::VMOVDI2SSrr : X86::MOVDI2SSrr);
3015263509Sdim  return 0;
3016263509Sdim}
3017226890Sdim
3018263509Sdimstatic
3019263509Sdimunsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
3020263509Sdim  if (X86::VR128XRegClass.contains(DestReg, SrcReg) ||
3021263509Sdim      X86::VR256XRegClass.contains(DestReg, SrcReg) ||
3022263509Sdim      X86::VR512RegClass.contains(DestReg, SrcReg)) {
3023263509Sdim     DestReg = get512BitSuperRegister(DestReg);
3024263509Sdim     SrcReg = get512BitSuperRegister(SrcReg);
3025263509Sdim     return X86::VMOVAPSZrr;
3026263509Sdim  }
3027263509Sdim  if ((X86::VK8RegClass.contains(DestReg) ||
3028263509Sdim       X86::VK16RegClass.contains(DestReg)) &&
3029263509Sdim      (X86::VK8RegClass.contains(SrcReg) ||
3030263509Sdim       X86::VK16RegClass.contains(SrcReg)))
3031263509Sdim    return X86::KMOVWkk;
3032212904Sdim  return 0;
3033212904Sdim}
3034212904Sdim
3035210299Sedvoid X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
3036210299Sed                               MachineBasicBlock::iterator MI, DebugLoc DL,
3037210299Sed                               unsigned DestReg, unsigned SrcReg,
3038210299Sed                               bool KillSrc) const {
3039210299Sed  // First deal with the normal symmetric copies.
3040226890Sdim  bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
3041263509Sdim  bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
3042263509Sdim  unsigned Opc = 0;
3043210299Sed  if (X86::GR64RegClass.contains(DestReg, SrcReg))
3044210299Sed    Opc = X86::MOV64rr;
3045210299Sed  else if (X86::GR32RegClass.contains(DestReg, SrcReg))
3046210299Sed    Opc = X86::MOV32rr;
3047210299Sed  else if (X86::GR16RegClass.contains(DestReg, SrcReg))
3048210299Sed    Opc = X86::MOV16rr;
3049210299Sed  else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
3050210299Sed    // Copying to or from a physical H register on x86-64 requires a NOREX
3051210299Sed    // move.  Otherwise use a normal move.
3052210299Sed    if ((isHReg(DestReg) || isHReg(SrcReg)) &&
3053226890Sdim        TM.getSubtarget<X86Subtarget>().is64Bit()) {
3054210299Sed      Opc = X86::MOV8rr_NOREX;
3055226890Sdim      // Both operands must be encodable without an REX prefix.
3056226890Sdim      assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&
3057226890Sdim             "8-bit H register can not be copied outside GR8_NOREX");
3058226890Sdim    } else
3059210299Sed      Opc = X86::MOV8rr;
3060263509Sdim  }
3061263509Sdim  else if (X86::VR64RegClass.contains(DestReg, SrcReg))
3062263509Sdim    Opc = X86::MMX_MOVQ64rr;
3063263509Sdim  else if (HasAVX512)
3064263509Sdim    Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg);
3065263509Sdim  else if (X86::VR128RegClass.contains(DestReg, SrcReg))
3066226890Sdim    Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
3067224145Sdim  else if (X86::VR256RegClass.contains(DestReg, SrcReg))
3068224145Sdim    Opc = X86::VMOVAPSYrr;
3069263509Sdim  if (!Opc)
3070263509Sdim    Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, TM.getSubtarget<X86Subtarget>());
3071193323Sed
3072210299Sed  if (Opc) {
3073210299Sed    BuildMI(MBB, MI, DL, get(Opc), DestReg)
3074210299Sed      .addReg(SrcReg, getKillRegState(KillSrc));
3075210299Sed    return;
3076193323Sed  }
3077198090Srdivacky
3078193323Sed  // Moving EFLAGS to / from another register requires a push and a pop.
3079252723Sdim  // Notice that we have to adjust the stack if we don't want to clobber the
3080252723Sdim  // first frame index. See X86FrameLowering.cpp - colobbersTheStack.
3081210299Sed  if (SrcReg == X86::EFLAGS) {
3082210299Sed    if (X86::GR64RegClass.contains(DestReg)) {
3083208599Srdivacky      BuildMI(MBB, MI, DL, get(X86::PUSHF64));
3084193323Sed      BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
3085210299Sed      return;
3086245431Sdim    }
3087245431Sdim    if (X86::GR32RegClass.contains(DestReg)) {
3088208599Srdivacky      BuildMI(MBB, MI, DL, get(X86::PUSHF32));
3089193323Sed      BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
3090210299Sed      return;
3091193323Sed    }
3092210299Sed  }
3093210299Sed  if (DestReg == X86::EFLAGS) {
3094210299Sed    if (X86::GR64RegClass.contains(SrcReg)) {
3095210299Sed      BuildMI(MBB, MI, DL, get(X86::PUSH64r))
3096210299Sed        .addReg(SrcReg, getKillRegState(KillSrc));
3097208599Srdivacky      BuildMI(MBB, MI, DL, get(X86::POPF64));
3098210299Sed      return;
3099245431Sdim    }
3100245431Sdim    if (X86::GR32RegClass.contains(SrcReg)) {
3101210299Sed      BuildMI(MBB, MI, DL, get(X86::PUSH32r))
3102210299Sed        .addReg(SrcReg, getKillRegState(KillSrc));
3103208599Srdivacky      BuildMI(MBB, MI, DL, get(X86::POPF32));
3104210299Sed      return;
3105193323Sed    }
3106193323Sed  }
3107193323Sed
3108210299Sed  DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
3109210299Sed               << " to " << RI.getName(DestReg) << '\n');
3110210299Sed  llvm_unreachable("Cannot emit physreg copy instruction");
3111193323Sed}
3112193323Sed
3113210299Sedstatic unsigned getLoadStoreRegOpcode(unsigned Reg,
3114210299Sed                                      const TargetRegisterClass *RC,
3115210299Sed                                      bool isStackAligned,
3116210299Sed                                      const TargetMachine &TM,
3117210299Sed                                      bool load) {
3118263509Sdim  if (TM.getSubtarget<X86Subtarget>().hasAVX512()) {
3119263509Sdim    if (X86::VK8RegClass.hasSubClassEq(RC)  ||
3120263509Sdim      X86::VK16RegClass.hasSubClassEq(RC))
3121263509Sdim      return load ? X86::KMOVWkm : X86::KMOVWmk;
3122263509Sdim    if (RC->getSize() == 4 && X86::FR32XRegClass.hasSubClassEq(RC))
3123263509Sdim      return load ? X86::VMOVSSZrm : X86::VMOVSSZmr;
3124263509Sdim    if (RC->getSize() == 8 && X86::FR64XRegClass.hasSubClassEq(RC))
3125263509Sdim      return load ? X86::VMOVSDZrm : X86::VMOVSDZmr;
3126263509Sdim    if (X86::VR512RegClass.hasSubClassEq(RC))
3127263509Sdim      return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
3128263509Sdim  }
3129263509Sdim
3130226890Sdim  bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
3131223017Sdim  switch (RC->getSize()) {
3132210299Sed  default:
3133223017Sdim    llvm_unreachable("Unknown spill size");
3134223017Sdim  case 1:
3135223017Sdim    assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
3136223017Sdim    if (TM.getSubtarget<X86Subtarget>().is64Bit())
3137223017Sdim      // Copying to or from a physical H register on x86-64 requires a NOREX
3138223017Sdim      // move.  Otherwise use a normal move.
3139223017Sdim      if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
3140223017Sdim        return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
3141223017Sdim    return load ? X86::MOV8rm : X86::MOV8mr;
3142223017Sdim  case 2:
3143223017Sdim    assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
3144210299Sed    return load ? X86::MOV16rm : X86::MOV16mr;
3145223017Sdim  case 4:
3146223017Sdim    if (X86::GR32RegClass.hasSubClassEq(RC))
3147223017Sdim      return load ? X86::MOV32rm : X86::MOV32mr;
3148223017Sdim    if (X86::FR32RegClass.hasSubClassEq(RC))
3149226890Sdim      return load ?
3150226890Sdim        (HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) :
3151226890Sdim        (HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
3152223017Sdim    if (X86::RFP32RegClass.hasSubClassEq(RC))
3153223017Sdim      return load ? X86::LD_Fp32m : X86::ST_Fp32m;
3154223017Sdim    llvm_unreachable("Unknown 4-byte regclass");
3155223017Sdim  case 8:
3156223017Sdim    if (X86::GR64RegClass.hasSubClassEq(RC))
3157223017Sdim      return load ? X86::MOV64rm : X86::MOV64mr;
3158223017Sdim    if (X86::FR64RegClass.hasSubClassEq(RC))
3159226890Sdim      return load ?
3160226890Sdim        (HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) :
3161226890Sdim        (HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
3162223017Sdim    if (X86::VR64RegClass.hasSubClassEq(RC))
3163223017Sdim      return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
3164223017Sdim    if (X86::RFP64RegClass.hasSubClassEq(RC))
3165223017Sdim      return load ? X86::LD_Fp64m : X86::ST_Fp64m;
3166223017Sdim    llvm_unreachable("Unknown 8-byte regclass");
3167223017Sdim  case 10:
3168223017Sdim    assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
3169210299Sed    return load ? X86::LD_Fp80m : X86::ST_FpP80m;
3170226890Sdim  case 16: {
3171263509Sdim    assert((X86::VR128RegClass.hasSubClassEq(RC) ||
3172263509Sdim            X86::VR128XRegClass.hasSubClassEq(RC))&& "Unknown 16-byte regclass");
3173193323Sed    // If stack is realigned we can use aligned stores.
3174210299Sed    if (isStackAligned)
3175226890Sdim      return load ?
3176226890Sdim        (HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) :
3177226890Sdim        (HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
3178210299Sed    else
3179226890Sdim      return load ?
3180226890Sdim        (HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) :
3181226890Sdim        (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
3182226890Sdim  }
3183224145Sdim  case 32:
3184263509Sdim    assert((X86::VR256RegClass.hasSubClassEq(RC) ||
3185263509Sdim            X86::VR256XRegClass.hasSubClassEq(RC)) && "Unknown 32-byte regclass");
3186224145Sdim    // If stack is realigned we can use aligned stores.
3187224145Sdim    if (isStackAligned)
3188224145Sdim      return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr;
3189224145Sdim    else
3190224145Sdim      return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr;
3191263509Sdim  case 64:
3192263509Sdim    assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass");
3193263509Sdim    if (isStackAligned)
3194263509Sdim      return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
3195263509Sdim    else
3196263509Sdim      return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
3197193323Sed  }
3198210299Sed}
3199193323Sed
3200210299Sedstatic unsigned getStoreRegOpcode(unsigned SrcReg,
3201210299Sed                                  const TargetRegisterClass *RC,
3202210299Sed                                  bool isStackAligned,
3203210299Sed                                  TargetMachine &TM) {
3204210299Sed  return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false);
3205193323Sed}
3206193323Sed
3207210299Sed
3208210299Sedstatic unsigned getLoadRegOpcode(unsigned DestReg,
3209210299Sed                                 const TargetRegisterClass *RC,
3210210299Sed                                 bool isStackAligned,
3211210299Sed                                 const TargetMachine &TM) {
3212210299Sed  return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true);
3213210299Sed}
3214210299Sed
3215193323Sedvoid X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
3216193323Sed                                       MachineBasicBlock::iterator MI,
3217193323Sed                                       unsigned SrcReg, bool isKill, int FrameIdx,
3218208599Srdivacky                                       const TargetRegisterClass *RC,
3219208599Srdivacky                                       const TargetRegisterInfo *TRI) const {
3220193323Sed  const MachineFunction &MF = *MBB.getParent();
3221212904Sdim  assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
3222212904Sdim         "Stack slot too small for store");
3223263509Sdim  unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
3224226890Sdim  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
3225224145Sdim    RI.canRealignStack(MF);
3226193323Sed  unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
3227203954Srdivacky  DebugLoc DL = MBB.findDebugLoc(MI);
3228193323Sed  addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
3229193323Sed    .addReg(SrcReg, getKillRegState(isKill));
3230193323Sed}
3231193323Sed
3232193323Sedvoid X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
3233193323Sed                                  bool isKill,
3234193323Sed                                  SmallVectorImpl<MachineOperand> &Addr,
3235193323Sed                                  const TargetRegisterClass *RC,
3236198090Srdivacky                                  MachineInstr::mmo_iterator MMOBegin,
3237198090Srdivacky                                  MachineInstr::mmo_iterator MMOEnd,
3238193323Sed                                  SmallVectorImpl<MachineInstr*> &NewMIs) const {
3239263509Sdim  unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
3240226890Sdim  bool isAligned = MMOBegin != MMOEnd &&
3241226890Sdim                   (*MMOBegin)->getAlignment() >= Alignment;
3242193323Sed  unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
3243206124Srdivacky  DebugLoc DL;
3244193323Sed  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
3245193323Sed  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
3246193323Sed    MIB.addOperand(Addr[i]);
3247193323Sed  MIB.addReg(SrcReg, getKillRegState(isKill));
3248198090Srdivacky  (*MIB).setMemRefs(MMOBegin, MMOEnd);
3249193323Sed  NewMIs.push_back(MIB);
3250193323Sed}
3251193323Sed
3252193323Sed
3253193323Sedvoid X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
3254193323Sed                                        MachineBasicBlock::iterator MI,
3255193323Sed                                        unsigned DestReg, int FrameIdx,
3256208599Srdivacky                                        const TargetRegisterClass *RC,
3257208599Srdivacky                                        const TargetRegisterInfo *TRI) const {
3258193323Sed  const MachineFunction &MF = *MBB.getParent();
3259263509Sdim  unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
3260226890Sdim  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
3261224145Sdim    RI.canRealignStack(MF);
3262193323Sed  unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
3263203954Srdivacky  DebugLoc DL = MBB.findDebugLoc(MI);
3264193323Sed  addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
3265193323Sed}
3266193323Sed
3267193323Sedvoid X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
3268193323Sed                                 SmallVectorImpl<MachineOperand> &Addr,
3269193323Sed                                 const TargetRegisterClass *RC,
3270198090Srdivacky                                 MachineInstr::mmo_iterator MMOBegin,
3271198090Srdivacky                                 MachineInstr::mmo_iterator MMOEnd,
3272193323Sed                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
3273263509Sdim  unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
3274226890Sdim  bool isAligned = MMOBegin != MMOEnd &&
3275226890Sdim                   (*MMOBegin)->getAlignment() >= Alignment;
3276193323Sed  unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
3277206124Srdivacky  DebugLoc DL;
3278193323Sed  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
3279193323Sed  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
3280193323Sed    MIB.addOperand(Addr[i]);
3281198090Srdivacky  (*MIB).setMemRefs(MMOBegin, MMOEnd);
3282193323Sed  NewMIs.push_back(MIB);
3283193323Sed}
3284193323Sed
3285245431Sdimbool X86InstrInfo::
3286245431SdimanalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
3287245431Sdim               int &CmpMask, int &CmpValue) const {
3288245431Sdim  switch (MI->getOpcode()) {
3289245431Sdim  default: break;
3290245431Sdim  case X86::CMP64ri32:
3291245431Sdim  case X86::CMP64ri8:
3292245431Sdim  case X86::CMP32ri:
3293245431Sdim  case X86::CMP32ri8:
3294245431Sdim  case X86::CMP16ri:
3295245431Sdim  case X86::CMP16ri8:
3296245431Sdim  case X86::CMP8ri:
3297245431Sdim    SrcReg = MI->getOperand(0).getReg();
3298245431Sdim    SrcReg2 = 0;
3299245431Sdim    CmpMask = ~0;
3300245431Sdim    CmpValue = MI->getOperand(1).getImm();
3301245431Sdim    return true;
3302245431Sdim  // A SUB can be used to perform comparison.
3303245431Sdim  case X86::SUB64rm:
3304245431Sdim  case X86::SUB32rm:
3305245431Sdim  case X86::SUB16rm:
3306245431Sdim  case X86::SUB8rm:
3307245431Sdim    SrcReg = MI->getOperand(1).getReg();
3308245431Sdim    SrcReg2 = 0;
3309245431Sdim    CmpMask = ~0;
3310245431Sdim    CmpValue = 0;
3311245431Sdim    return true;
3312245431Sdim  case X86::SUB64rr:
3313245431Sdim  case X86::SUB32rr:
3314245431Sdim  case X86::SUB16rr:
3315245431Sdim  case X86::SUB8rr:
3316245431Sdim    SrcReg = MI->getOperand(1).getReg();
3317245431Sdim    SrcReg2 = MI->getOperand(2).getReg();
3318245431Sdim    CmpMask = ~0;
3319245431Sdim    CmpValue = 0;
3320245431Sdim    return true;
3321245431Sdim  case X86::SUB64ri32:
3322245431Sdim  case X86::SUB64ri8:
3323245431Sdim  case X86::SUB32ri:
3324245431Sdim  case X86::SUB32ri8:
3325245431Sdim  case X86::SUB16ri:
3326245431Sdim  case X86::SUB16ri8:
3327245431Sdim  case X86::SUB8ri:
3328245431Sdim    SrcReg = MI->getOperand(1).getReg();
3329245431Sdim    SrcReg2 = 0;
3330245431Sdim    CmpMask = ~0;
3331245431Sdim    CmpValue = MI->getOperand(2).getImm();
3332245431Sdim    return true;
3333245431Sdim  case X86::CMP64rr:
3334245431Sdim  case X86::CMP32rr:
3335245431Sdim  case X86::CMP16rr:
3336245431Sdim  case X86::CMP8rr:
3337245431Sdim    SrcReg = MI->getOperand(0).getReg();
3338245431Sdim    SrcReg2 = MI->getOperand(1).getReg();
3339245431Sdim    CmpMask = ~0;
3340245431Sdim    CmpValue = 0;
3341245431Sdim    return true;
3342245431Sdim  case X86::TEST8rr:
3343245431Sdim  case X86::TEST16rr:
3344245431Sdim  case X86::TEST32rr:
3345245431Sdim  case X86::TEST64rr:
3346245431Sdim    SrcReg = MI->getOperand(0).getReg();
3347245431Sdim    if (MI->getOperand(1).getReg() != SrcReg) return false;
3348245431Sdim    // Compare against zero.
3349245431Sdim    SrcReg2 = 0;
3350245431Sdim    CmpMask = ~0;
3351245431Sdim    CmpValue = 0;
3352245431Sdim    return true;
3353245431Sdim  }
3354245431Sdim  return false;
3355245431Sdim}
3356245431Sdim
3357245431Sdim/// isRedundantFlagInstr - check whether the first instruction, whose only
3358245431Sdim/// purpose is to update flags, can be made redundant.
3359245431Sdim/// CMPrr can be made redundant by SUBrr if the operands are the same.
3360245431Sdim/// This function can be extended later on.
3361245431Sdim/// SrcReg, SrcRegs: register operands for FlagI.
3362245431Sdim/// ImmValue: immediate for FlagI if it takes an immediate.
3363245431Sdiminline static bool isRedundantFlagInstr(MachineInstr *FlagI, unsigned SrcReg,
3364245431Sdim                                        unsigned SrcReg2, int ImmValue,
3365245431Sdim                                        MachineInstr *OI) {
3366245431Sdim  if (((FlagI->getOpcode() == X86::CMP64rr &&
3367245431Sdim        OI->getOpcode() == X86::SUB64rr) ||
3368245431Sdim       (FlagI->getOpcode() == X86::CMP32rr &&
3369245431Sdim        OI->getOpcode() == X86::SUB32rr)||
3370245431Sdim       (FlagI->getOpcode() == X86::CMP16rr &&
3371245431Sdim        OI->getOpcode() == X86::SUB16rr)||
3372245431Sdim       (FlagI->getOpcode() == X86::CMP8rr &&
3373245431Sdim        OI->getOpcode() == X86::SUB8rr)) &&
3374245431Sdim      ((OI->getOperand(1).getReg() == SrcReg &&
3375245431Sdim        OI->getOperand(2).getReg() == SrcReg2) ||
3376245431Sdim       (OI->getOperand(1).getReg() == SrcReg2 &&
3377245431Sdim        OI->getOperand(2).getReg() == SrcReg)))
3378245431Sdim    return true;
3379245431Sdim
3380245431Sdim  if (((FlagI->getOpcode() == X86::CMP64ri32 &&
3381245431Sdim        OI->getOpcode() == X86::SUB64ri32) ||
3382245431Sdim       (FlagI->getOpcode() == X86::CMP64ri8 &&
3383245431Sdim        OI->getOpcode() == X86::SUB64ri8) ||
3384245431Sdim       (FlagI->getOpcode() == X86::CMP32ri &&
3385245431Sdim        OI->getOpcode() == X86::SUB32ri) ||
3386245431Sdim       (FlagI->getOpcode() == X86::CMP32ri8 &&
3387245431Sdim        OI->getOpcode() == X86::SUB32ri8) ||
3388245431Sdim       (FlagI->getOpcode() == X86::CMP16ri &&
3389245431Sdim        OI->getOpcode() == X86::SUB16ri) ||
3390245431Sdim       (FlagI->getOpcode() == X86::CMP16ri8 &&
3391245431Sdim        OI->getOpcode() == X86::SUB16ri8) ||
3392245431Sdim       (FlagI->getOpcode() == X86::CMP8ri &&
3393245431Sdim        OI->getOpcode() == X86::SUB8ri)) &&
3394245431Sdim      OI->getOperand(1).getReg() == SrcReg &&
3395245431Sdim      OI->getOperand(2).getImm() == ImmValue)
3396245431Sdim    return true;
3397245431Sdim  return false;
3398245431Sdim}
3399245431Sdim
3400245431Sdim/// isDefConvertible - check whether the definition can be converted
3401245431Sdim/// to remove a comparison against zero.
3402245431Sdiminline static bool isDefConvertible(MachineInstr *MI) {
3403245431Sdim  switch (MI->getOpcode()) {
3404245431Sdim  default: return false;
3405263509Sdim
3406263509Sdim  // The shift instructions only modify ZF if their shift count is non-zero.
3407263509Sdim  // N.B.: The processor truncates the shift count depending on the encoding.
3408263509Sdim  case X86::SAR8ri:    case X86::SAR16ri:  case X86::SAR32ri:case X86::SAR64ri:
3409263509Sdim  case X86::SHR8ri:    case X86::SHR16ri:  case X86::SHR32ri:case X86::SHR64ri:
3410263509Sdim     return getTruncatedShiftCount(MI, 2) != 0;
3411263509Sdim
3412263509Sdim  // Some left shift instructions can be turned into LEA instructions but only
3413263509Sdim  // if their flags aren't used. Avoid transforming such instructions.
3414263509Sdim  case X86::SHL8ri:    case X86::SHL16ri:  case X86::SHL32ri:case X86::SHL64ri:{
3415263509Sdim    unsigned ShAmt = getTruncatedShiftCount(MI, 2);
3416263509Sdim    if (isTruncatedShiftCountForLEA(ShAmt)) return false;
3417263509Sdim    return ShAmt != 0;
3418263509Sdim  }
3419263509Sdim
3420263509Sdim  case X86::SHRD16rri8:case X86::SHRD32rri8:case X86::SHRD64rri8:
3421263509Sdim  case X86::SHLD16rri8:case X86::SHLD32rri8:case X86::SHLD64rri8:
3422263509Sdim     return getTruncatedShiftCount(MI, 3) != 0;
3423263509Sdim
3424245431Sdim  case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri:
3425245431Sdim  case X86::SUB32ri8:  case X86::SUB16ri:  case X86::SUB16ri8:
3426245431Sdim  case X86::SUB8ri:    case X86::SUB64rr:  case X86::SUB32rr:
3427245431Sdim  case X86::SUB16rr:   case X86::SUB8rr:   case X86::SUB64rm:
3428245431Sdim  case X86::SUB32rm:   case X86::SUB16rm:  case X86::SUB8rm:
3429252723Sdim  case X86::DEC64r:    case X86::DEC32r:   case X86::DEC16r: case X86::DEC8r:
3430245431Sdim  case X86::DEC64_32r: case X86::DEC64_16r:
3431245431Sdim  case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
3432245431Sdim  case X86::ADD32ri8:  case X86::ADD16ri:  case X86::ADD16ri8:
3433245431Sdim  case X86::ADD8ri:    case X86::ADD64rr:  case X86::ADD32rr:
3434245431Sdim  case X86::ADD16rr:   case X86::ADD8rr:   case X86::ADD64rm:
3435245431Sdim  case X86::ADD32rm:   case X86::ADD16rm:  case X86::ADD8rm:
3436252723Sdim  case X86::INC64r:    case X86::INC32r:   case X86::INC16r: case X86::INC8r:
3437245431Sdim  case X86::INC64_32r: case X86::INC64_16r:
3438245431Sdim  case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
3439245431Sdim  case X86::AND32ri8:  case X86::AND16ri:  case X86::AND16ri8:
3440245431Sdim  case X86::AND8ri:    case X86::AND64rr:  case X86::AND32rr:
3441245431Sdim  case X86::AND16rr:   case X86::AND8rr:   case X86::AND64rm:
3442245431Sdim  case X86::AND32rm:   case X86::AND16rm:  case X86::AND8rm:
3443245431Sdim  case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri:
3444245431Sdim  case X86::XOR32ri8:  case X86::XOR16ri:  case X86::XOR16ri8:
3445245431Sdim  case X86::XOR8ri:    case X86::XOR64rr:  case X86::XOR32rr:
3446245431Sdim  case X86::XOR16rr:   case X86::XOR8rr:   case X86::XOR64rm:
3447245431Sdim  case X86::XOR32rm:   case X86::XOR16rm:  case X86::XOR8rm:
3448245431Sdim  case X86::OR64ri32:  case X86::OR64ri8:  case X86::OR32ri:
3449245431Sdim  case X86::OR32ri8:   case X86::OR16ri:   case X86::OR16ri8:
3450245431Sdim  case X86::OR8ri:     case X86::OR64rr:   case X86::OR32rr:
3451245431Sdim  case X86::OR16rr:    case X86::OR8rr:    case X86::OR64rm:
3452245431Sdim  case X86::OR32rm:    case X86::OR16rm:   case X86::OR8rm:
3453263509Sdim  case X86::NEG8r:     case X86::NEG16r:   case X86::NEG32r: case X86::NEG64r:
3454263509Sdim  case X86::SAR8r1:    case X86::SAR16r1:  case X86::SAR32r1:case X86::SAR64r1:
3455263509Sdim  case X86::SHR8r1:    case X86::SHR16r1:  case X86::SHR32r1:case X86::SHR64r1:
3456263509Sdim  case X86::SHL8r1:    case X86::SHL16r1:  case X86::SHL32r1:case X86::SHL64r1:
3457263509Sdim  case X86::ADC32ri:   case X86::ADC32ri8:
3458263509Sdim  case X86::ADC32rr:   case X86::ADC64ri32:
3459263509Sdim  case X86::ADC64ri8:  case X86::ADC64rr:
3460263509Sdim  case X86::SBB32ri:   case X86::SBB32ri8:
3461263509Sdim  case X86::SBB32rr:   case X86::SBB64ri32:
3462263509Sdim  case X86::SBB64ri8:  case X86::SBB64rr:
3463252723Sdim  case X86::ANDN32rr:  case X86::ANDN32rm:
3464252723Sdim  case X86::ANDN64rr:  case X86::ANDN64rm:
3465263509Sdim  case X86::BEXTR32rr: case X86::BEXTR64rr:
3466263509Sdim  case X86::BEXTR32rm: case X86::BEXTR64rm:
3467263509Sdim  case X86::BLSI32rr:  case X86::BLSI32rm:
3468263509Sdim  case X86::BLSI64rr:  case X86::BLSI64rm:
3469263509Sdim  case X86::BLSMSK32rr:case X86::BLSMSK32rm:
3470263509Sdim  case X86::BLSMSK64rr:case X86::BLSMSK64rm:
3471263509Sdim  case X86::BLSR32rr:  case X86::BLSR32rm:
3472263509Sdim  case X86::BLSR64rr:  case X86::BLSR64rm:
3473263509Sdim  case X86::BZHI32rr:  case X86::BZHI32rm:
3474263509Sdim  case X86::BZHI64rr:  case X86::BZHI64rm:
3475263509Sdim  case X86::LZCNT16rr: case X86::LZCNT16rm:
3476263509Sdim  case X86::LZCNT32rr: case X86::LZCNT32rm:
3477263509Sdim  case X86::LZCNT64rr: case X86::LZCNT64rm:
3478263509Sdim  case X86::POPCNT16rr:case X86::POPCNT16rm:
3479263509Sdim  case X86::POPCNT32rr:case X86::POPCNT32rm:
3480263509Sdim  case X86::POPCNT64rr:case X86::POPCNT64rm:
3481263509Sdim  case X86::TZCNT16rr: case X86::TZCNT16rm:
3482263509Sdim  case X86::TZCNT32rr: case X86::TZCNT32rm:
3483263509Sdim  case X86::TZCNT64rr: case X86::TZCNT64rm:
3484245431Sdim    return true;
3485245431Sdim  }
3486245431Sdim}
3487245431Sdim
3488245431Sdim/// optimizeCompareInstr - Check if there exists an earlier instruction that
3489245431Sdim/// operates on the same source operands and sets flags in the same way as
3490245431Sdim/// Compare; remove Compare if possible.
3491245431Sdimbool X86InstrInfo::
3492245431SdimoptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
3493245431Sdim                     int CmpMask, int CmpValue,
3494245431Sdim                     const MachineRegisterInfo *MRI) const {
3495245431Sdim  // Check whether we can replace SUB with CMP.
3496245431Sdim  unsigned NewOpcode = 0;
3497245431Sdim  switch (CmpInstr->getOpcode()) {
3498245431Sdim  default: break;
3499245431Sdim  case X86::SUB64ri32:
3500245431Sdim  case X86::SUB64ri8:
3501245431Sdim  case X86::SUB32ri:
3502245431Sdim  case X86::SUB32ri8:
3503245431Sdim  case X86::SUB16ri:
3504245431Sdim  case X86::SUB16ri8:
3505245431Sdim  case X86::SUB8ri:
3506245431Sdim  case X86::SUB64rm:
3507245431Sdim  case X86::SUB32rm:
3508245431Sdim  case X86::SUB16rm:
3509245431Sdim  case X86::SUB8rm:
3510245431Sdim  case X86::SUB64rr:
3511245431Sdim  case X86::SUB32rr:
3512245431Sdim  case X86::SUB16rr:
3513245431Sdim  case X86::SUB8rr: {
3514245431Sdim    if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
3515245431Sdim      return false;
3516245431Sdim    // There is no use of the destination register, we can replace SUB with CMP.
3517245431Sdim    switch (CmpInstr->getOpcode()) {
3518245431Sdim    default: llvm_unreachable("Unreachable!");
3519245431Sdim    case X86::SUB64rm:   NewOpcode = X86::CMP64rm;   break;
3520245431Sdim    case X86::SUB32rm:   NewOpcode = X86::CMP32rm;   break;
3521245431Sdim    case X86::SUB16rm:   NewOpcode = X86::CMP16rm;   break;
3522245431Sdim    case X86::SUB8rm:    NewOpcode = X86::CMP8rm;    break;
3523245431Sdim    case X86::SUB64rr:   NewOpcode = X86::CMP64rr;   break;
3524245431Sdim    case X86::SUB32rr:   NewOpcode = X86::CMP32rr;   break;
3525245431Sdim    case X86::SUB16rr:   NewOpcode = X86::CMP16rr;   break;
3526245431Sdim    case X86::SUB8rr:    NewOpcode = X86::CMP8rr;    break;
3527245431Sdim    case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
3528245431Sdim    case X86::SUB64ri8:  NewOpcode = X86::CMP64ri8;  break;
3529245431Sdim    case X86::SUB32ri:   NewOpcode = X86::CMP32ri;   break;
3530245431Sdim    case X86::SUB32ri8:  NewOpcode = X86::CMP32ri8;  break;
3531245431Sdim    case X86::SUB16ri:   NewOpcode = X86::CMP16ri;   break;
3532245431Sdim    case X86::SUB16ri8:  NewOpcode = X86::CMP16ri8;  break;
3533245431Sdim    case X86::SUB8ri:    NewOpcode = X86::CMP8ri;    break;
3534245431Sdim    }
3535245431Sdim    CmpInstr->setDesc(get(NewOpcode));
3536245431Sdim    CmpInstr->RemoveOperand(0);
3537245431Sdim    // Fall through to optimize Cmp if Cmp is CMPrr or CMPri.
3538245431Sdim    if (NewOpcode == X86::CMP64rm || NewOpcode == X86::CMP32rm ||
3539245431Sdim        NewOpcode == X86::CMP16rm || NewOpcode == X86::CMP8rm)
3540245431Sdim      return false;
3541245431Sdim  }
3542245431Sdim  }
3543245431Sdim
3544245431Sdim  // Get the unique definition of SrcReg.
3545245431Sdim  MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
3546245431Sdim  if (!MI) return false;
3547245431Sdim
3548245431Sdim  // CmpInstr is the first instruction of the BB.
3549245431Sdim  MachineBasicBlock::iterator I = CmpInstr, Def = MI;
3550245431Sdim
3551245431Sdim  // If we are comparing against zero, check whether we can use MI to update
3552245431Sdim  // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
3553245431Sdim  bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0);
3554245431Sdim  if (IsCmpZero && (MI->getParent() != CmpInstr->getParent() ||
3555245431Sdim      !isDefConvertible(MI)))
3556245431Sdim    return false;
3557245431Sdim
3558245431Sdim  // We are searching for an earlier instruction that can make CmpInstr
3559245431Sdim  // redundant and that instruction will be saved in Sub.
3560245431Sdim  MachineInstr *Sub = NULL;
3561245431Sdim  const TargetRegisterInfo *TRI = &getRegisterInfo();
3562245431Sdim
3563245431Sdim  // We iterate backward, starting from the instruction before CmpInstr and
3564245431Sdim  // stop when reaching the definition of a source register or done with the BB.
3565245431Sdim  // RI points to the instruction before CmpInstr.
3566245431Sdim  // If the definition is in this basic block, RE points to the definition;
3567245431Sdim  // otherwise, RE is the rend of the basic block.
3568245431Sdim  MachineBasicBlock::reverse_iterator
3569245431Sdim      RI = MachineBasicBlock::reverse_iterator(I),
3570245431Sdim      RE = CmpInstr->getParent() == MI->getParent() ?
3571245431Sdim           MachineBasicBlock::reverse_iterator(++Def) /* points to MI */ :
3572245431Sdim           CmpInstr->getParent()->rend();
3573245431Sdim  MachineInstr *Movr0Inst = 0;
3574245431Sdim  for (; RI != RE; ++RI) {
3575245431Sdim    MachineInstr *Instr = &*RI;
3576245431Sdim    // Check whether CmpInstr can be made redundant by the current instruction.
3577245431Sdim    if (!IsCmpZero &&
3578245431Sdim        isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, Instr)) {
3579245431Sdim      Sub = Instr;
3580245431Sdim      break;
3581245431Sdim    }
3582245431Sdim
3583245431Sdim    if (Instr->modifiesRegister(X86::EFLAGS, TRI) ||
3584245431Sdim        Instr->readsRegister(X86::EFLAGS, TRI)) {
3585245431Sdim      // This instruction modifies or uses EFLAGS.
3586245431Sdim
3587245431Sdim      // MOV32r0 etc. are implemented with xor which clobbers condition code.
3588245431Sdim      // They are safe to move up, if the definition to EFLAGS is dead and
3589245431Sdim      // earlier instructions do not read or write EFLAGS.
3590263509Sdim      if (!Movr0Inst && Instr->getOpcode() == X86::MOV32r0 &&
3591245431Sdim          Instr->registerDefIsDead(X86::EFLAGS, TRI)) {
3592245431Sdim        Movr0Inst = Instr;
3593245431Sdim        continue;
3594245431Sdim      }
3595245431Sdim
3596245431Sdim      // We can't remove CmpInstr.
3597245431Sdim      return false;
3598245431Sdim    }
3599245431Sdim  }
3600245431Sdim
3601245431Sdim  // Return false if no candidates exist.
3602245431Sdim  if (!IsCmpZero && !Sub)
3603245431Sdim    return false;
3604245431Sdim
3605245431Sdim  bool IsSwapped = (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
3606245431Sdim                    Sub->getOperand(2).getReg() == SrcReg);
3607245431Sdim
3608245431Sdim  // Scan forward from the instruction after CmpInstr for uses of EFLAGS.
3609245431Sdim  // It is safe to remove CmpInstr if EFLAGS is redefined or killed.
3610245431Sdim  // If we are done with the basic block, we need to check whether EFLAGS is
3611245431Sdim  // live-out.
3612245431Sdim  bool IsSafe = false;
3613245431Sdim  SmallVector<std::pair<MachineInstr*, unsigned /*NewOpc*/>, 4> OpsToUpdate;
3614245431Sdim  MachineBasicBlock::iterator E = CmpInstr->getParent()->end();
3615245431Sdim  for (++I; I != E; ++I) {
3616245431Sdim    const MachineInstr &Instr = *I;
3617245431Sdim    bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
3618245431Sdim    bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
3619245431Sdim    // We should check the usage if this instruction uses and updates EFLAGS.
3620245431Sdim    if (!UseEFLAGS && ModifyEFLAGS) {
3621245431Sdim      // It is safe to remove CmpInstr if EFLAGS is updated again.
3622245431Sdim      IsSafe = true;
3623245431Sdim      break;
3624245431Sdim    }
3625245431Sdim    if (!UseEFLAGS && !ModifyEFLAGS)
3626245431Sdim      continue;
3627245431Sdim
3628245431Sdim    // EFLAGS is used by this instruction.
3629245431Sdim    X86::CondCode OldCC;
3630245431Sdim    bool OpcIsSET = false;
3631245431Sdim    if (IsCmpZero || IsSwapped) {
3632245431Sdim      // We decode the condition code from opcode.
3633245431Sdim      if (Instr.isBranch())
3634245431Sdim        OldCC = getCondFromBranchOpc(Instr.getOpcode());
3635245431Sdim      else {
3636245431Sdim        OldCC = getCondFromSETOpc(Instr.getOpcode());
3637245431Sdim        if (OldCC != X86::COND_INVALID)
3638245431Sdim          OpcIsSET = true;
3639245431Sdim        else
3640245431Sdim          OldCC = X86::getCondFromCMovOpc(Instr.getOpcode());
3641245431Sdim      }
3642245431Sdim      if (OldCC == X86::COND_INVALID) return false;
3643245431Sdim    }
3644245431Sdim    if (IsCmpZero) {
3645245431Sdim      switch (OldCC) {
3646245431Sdim      default: break;
3647245431Sdim      case X86::COND_A: case X86::COND_AE:
3648245431Sdim      case X86::COND_B: case X86::COND_BE:
3649245431Sdim      case X86::COND_G: case X86::COND_GE:
3650245431Sdim      case X86::COND_L: case X86::COND_LE:
3651245431Sdim      case X86::COND_O: case X86::COND_NO:
3652245431Sdim        // CF and OF are used, we can't perform this optimization.
3653245431Sdim        return false;
3654245431Sdim      }
3655245431Sdim    } else if (IsSwapped) {
3656245431Sdim      // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
3657245431Sdim      // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
3658245431Sdim      // We swap the condition code and synthesize the new opcode.
3659245431Sdim      X86::CondCode NewCC = getSwappedCondition(OldCC);
3660245431Sdim      if (NewCC == X86::COND_INVALID) return false;
3661245431Sdim
3662245431Sdim      // Synthesize the new opcode.
3663245431Sdim      bool HasMemoryOperand = Instr.hasOneMemOperand();
3664245431Sdim      unsigned NewOpc;
3665245431Sdim      if (Instr.isBranch())
3666245431Sdim        NewOpc = GetCondBranchFromCond(NewCC);
3667245431Sdim      else if(OpcIsSET)
3668245431Sdim        NewOpc = getSETFromCond(NewCC, HasMemoryOperand);
3669245431Sdim      else {
3670245431Sdim        unsigned DstReg = Instr.getOperand(0).getReg();
3671245431Sdim        NewOpc = getCMovFromCond(NewCC, MRI->getRegClass(DstReg)->getSize(),
3672245431Sdim                                 HasMemoryOperand);
3673245431Sdim      }
3674245431Sdim
3675245431Sdim      // Push the MachineInstr to OpsToUpdate.
3676245431Sdim      // If it is safe to remove CmpInstr, the condition code of these
3677245431Sdim      // instructions will be modified.
3678245431Sdim      OpsToUpdate.push_back(std::make_pair(&*I, NewOpc));
3679245431Sdim    }
3680245431Sdim    if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
3681245431Sdim      // It is safe to remove CmpInstr if EFLAGS is updated again or killed.
3682245431Sdim      IsSafe = true;
3683245431Sdim      break;
3684245431Sdim    }
3685245431Sdim  }
3686245431Sdim
3687245431Sdim  // If EFLAGS is not killed nor re-defined, we should check whether it is
3688245431Sdim  // live-out. If it is live-out, do not optimize.
3689245431Sdim  if ((IsCmpZero || IsSwapped) && !IsSafe) {
3690245431Sdim    MachineBasicBlock *MBB = CmpInstr->getParent();
3691245431Sdim    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
3692245431Sdim             SE = MBB->succ_end(); SI != SE; ++SI)
3693245431Sdim      if ((*SI)->isLiveIn(X86::EFLAGS))
3694245431Sdim        return false;
3695245431Sdim  }
3696245431Sdim
3697245431Sdim  // The instruction to be updated is either Sub or MI.
3698245431Sdim  Sub = IsCmpZero ? MI : Sub;
3699263509Sdim  // Move Movr0Inst to the appropriate place before Sub.
3700245431Sdim  if (Movr0Inst) {
3701263509Sdim    // Look backwards until we find a def that doesn't use the current EFLAGS.
3702263509Sdim    Def = Sub;
3703263509Sdim    MachineBasicBlock::reverse_iterator
3704263509Sdim      InsertI = MachineBasicBlock::reverse_iterator(++Def),
3705263509Sdim                InsertE = Sub->getParent()->rend();
3706263509Sdim    for (; InsertI != InsertE; ++InsertI) {
3707263509Sdim      MachineInstr *Instr = &*InsertI;
3708263509Sdim      if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
3709263509Sdim          Instr->modifiesRegister(X86::EFLAGS, TRI)) {
3710263509Sdim        Sub->getParent()->remove(Movr0Inst);
3711263509Sdim        Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
3712263509Sdim                                   Movr0Inst);
3713263509Sdim        break;
3714263509Sdim      }
3715263509Sdim    }
3716263509Sdim    if (InsertI == InsertE)
3717263509Sdim      return false;
3718245431Sdim  }
3719245431Sdim
3720245431Sdim  // Make sure Sub instruction defines EFLAGS and mark the def live.
3721263509Sdim  unsigned i = 0, e = Sub->getNumOperands();
3722263509Sdim  for (; i != e; ++i) {
3723263509Sdim    MachineOperand &MO = Sub->getOperand(i);
3724263509Sdim    if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
3725263509Sdim      MO.setIsDead(false);
3726263509Sdim      break;
3727263509Sdim    }
3728263509Sdim  }
3729263509Sdim  assert(i != e && "Unable to locate a def EFLAGS operand");
3730263509Sdim
3731245431Sdim  CmpInstr->eraseFromParent();
3732245431Sdim
3733245431Sdim  // Modify the condition code of instructions in OpsToUpdate.
3734245431Sdim  for (unsigned i = 0, e = OpsToUpdate.size(); i < e; i++)
3735245431Sdim    OpsToUpdate[i].first->setDesc(get(OpsToUpdate[i].second));
3736245431Sdim  return true;
3737245431Sdim}
3738245431Sdim
3739245431Sdim/// optimizeLoadInstr - Try to remove the load by folding it to a register
3740245431Sdim/// operand at the use. We fold the load instructions if load defines a virtual
3741245431Sdim/// register, the virtual register is used once in the same BB, and the
3742245431Sdim/// instructions in-between do not load or store, and have no side effects.
3743245431SdimMachineInstr* X86InstrInfo::
3744245431SdimoptimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
3745245431Sdim                  unsigned &FoldAsLoadDefReg,
3746245431Sdim                  MachineInstr *&DefMI) const {
3747245431Sdim  if (FoldAsLoadDefReg == 0)
3748245431Sdim    return 0;
3749245431Sdim  // To be conservative, if there exists another load, clear the load candidate.
3750245431Sdim  if (MI->mayLoad()) {
3751245431Sdim    FoldAsLoadDefReg = 0;
3752245431Sdim    return 0;
3753245431Sdim  }
3754245431Sdim
3755245431Sdim  // Check whether we can move DefMI here.
3756245431Sdim  DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
3757245431Sdim  assert(DefMI);
3758245431Sdim  bool SawStore = false;
3759245431Sdim  if (!DefMI->isSafeToMove(this, 0, SawStore))
3760245431Sdim    return 0;
3761245431Sdim
3762245431Sdim  // We try to commute MI if possible.
3763245431Sdim  unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
3764245431Sdim  for (unsigned Idx = 0; Idx < IdxEnd; Idx++) {
3765245431Sdim    // Collect information about virtual register operands of MI.
3766245431Sdim    unsigned SrcOperandId = 0;
3767245431Sdim    bool FoundSrcOperand = false;
3768245431Sdim    for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
3769245431Sdim      MachineOperand &MO = MI->getOperand(i);
3770245431Sdim      if (!MO.isReg())
3771245431Sdim        continue;
3772245431Sdim      unsigned Reg = MO.getReg();
3773245431Sdim      if (Reg != FoldAsLoadDefReg)
3774245431Sdim        continue;
3775245431Sdim      // Do not fold if we have a subreg use or a def or multiple uses.
3776245431Sdim      if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
3777245431Sdim        return 0;
3778245431Sdim
3779245431Sdim      SrcOperandId = i;
3780245431Sdim      FoundSrcOperand = true;
3781245431Sdim    }
3782245431Sdim    if (!FoundSrcOperand) return 0;
3783245431Sdim
3784245431Sdim    // Check whether we can fold the def into SrcOperandId.
3785245431Sdim    SmallVector<unsigned, 8> Ops;
3786245431Sdim    Ops.push_back(SrcOperandId);
3787245431Sdim    MachineInstr *FoldMI = foldMemoryOperand(MI, Ops, DefMI);
3788245431Sdim    if (FoldMI) {
3789245431Sdim      FoldAsLoadDefReg = 0;
3790245431Sdim      return FoldMI;
3791245431Sdim    }
3792245431Sdim
3793245431Sdim    if (Idx == 1) {
3794245431Sdim      // MI was changed but it didn't help, commute it back!
3795245431Sdim      commuteInstruction(MI, false);
3796245431Sdim      return 0;
3797245431Sdim    }
3798245431Sdim
3799245431Sdim    // Check whether we can commute MI and enable folding.
3800245431Sdim    if (MI->isCommutable()) {
3801245431Sdim      MachineInstr *NewMI = commuteInstruction(MI, false);
3802245431Sdim      // Unable to commute.
3803245431Sdim      if (!NewMI) return 0;
3804245431Sdim      if (NewMI != MI) {
3805245431Sdim        // New instruction. It doesn't need to be kept.
3806245431Sdim        NewMI->eraseFromParent();
3807245431Sdim        return 0;
3808245431Sdim      }
3809245431Sdim    }
3810245431Sdim  }
3811245431Sdim  return 0;
3812245431Sdim}
3813245431Sdim
3814226890Sdim/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
3815226890Sdim/// instruction with two undef reads of the register being defined.  This is
3816226890Sdim/// used for mapping:
3817226890Sdim///   %xmm4 = V_SET0
3818226890Sdim/// to:
3819226890Sdim///   %xmm4 = PXORrr %xmm4<undef>, %xmm4<undef>
3820226890Sdim///
3821252723Sdimstatic bool Expand2AddrUndef(MachineInstrBuilder &MIB,
3822252723Sdim                             const MCInstrDesc &Desc) {
3823226890Sdim  assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
3824252723Sdim  unsigned Reg = MIB->getOperand(0).getReg();
3825252723Sdim  MIB->setDesc(Desc);
3826226890Sdim
3827226890Sdim  // MachineInstr::addOperand() will insert explicit operands before any
3828226890Sdim  // implicit operands.
3829252723Sdim  MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
3830226890Sdim  // But we don't trust that.
3831252723Sdim  assert(MIB->getOperand(1).getReg() == Reg &&
3832252723Sdim         MIB->getOperand(2).getReg() == Reg && "Misplaced operand");
3833226890Sdim  return true;
3834226890Sdim}
3835226890Sdim
3836226890Sdimbool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
3837226890Sdim  bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
3838252723Sdim  MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
3839226890Sdim  switch (MI->getOpcode()) {
3840245431Sdim  case X86::SETB_C8r:
3841252723Sdim    return Expand2AddrUndef(MIB, get(X86::SBB8rr));
3842245431Sdim  case X86::SETB_C16r:
3843252723Sdim    return Expand2AddrUndef(MIB, get(X86::SBB16rr));
3844245431Sdim  case X86::SETB_C32r:
3845252723Sdim    return Expand2AddrUndef(MIB, get(X86::SBB32rr));
3846245431Sdim  case X86::SETB_C64r:
3847252723Sdim    return Expand2AddrUndef(MIB, get(X86::SBB64rr));
3848226890Sdim  case X86::V_SET0:
3849235633Sdim  case X86::FsFLD0SS:
3850235633Sdim  case X86::FsFLD0SD:
3851252723Sdim    return Expand2AddrUndef(MIB, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
3852245431Sdim  case X86::AVX_SET0:
3853245431Sdim    assert(HasAVX && "AVX not supported");
3854252723Sdim    return Expand2AddrUndef(MIB, get(X86::VXORPSYrr));
3855263509Sdim  case X86::AVX512_512_SET0:
3856263509Sdim    return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
3857245431Sdim  case X86::V_SETALLONES:
3858252723Sdim    return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
3859245431Sdim  case X86::AVX2_SETALLONES:
3860252723Sdim    return Expand2AddrUndef(MIB, get(X86::VPCMPEQDYrr));
3861226890Sdim  case X86::TEST8ri_NOREX:
3862226890Sdim    MI->setDesc(get(X86::TEST8ri));
3863226890Sdim    return true;
3864263509Sdim  case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr));
3865263509Sdim  case X86::KSET1B:
3866263509Sdim  case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr));
3867226890Sdim  }
3868226890Sdim  return false;
3869226890Sdim}
3870226890Sdim
3871193323Sedstatic MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
3872193323Sed                                     const SmallVectorImpl<MachineOperand> &MOs,
3873193323Sed                                     MachineInstr *MI,
3874193323Sed                                     const TargetInstrInfo &TII) {
3875193323Sed  // Create the base instruction with the memory operand as the first part.
3876252723Sdim  // Omit the implicit operands, something BuildMI can't do.
3877193323Sed  MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
3878193323Sed                                              MI->getDebugLoc(), true);
3879252723Sdim  MachineInstrBuilder MIB(MF, NewMI);
3880193323Sed  unsigned NumAddrOps = MOs.size();
3881193323Sed  for (unsigned i = 0; i != NumAddrOps; ++i)
3882193323Sed    MIB.addOperand(MOs[i]);
3883193323Sed  if (NumAddrOps < 4)  // FrameIndex only
3884193323Sed    addOffset(MIB, 0);
3885218893Sdim
3886193323Sed  // Loop over the rest of the ri operands, converting them over.
3887193323Sed  unsigned NumOps = MI->getDesc().getNumOperands()-2;
3888193323Sed  for (unsigned i = 0; i != NumOps; ++i) {
3889193323Sed    MachineOperand &MO = MI->getOperand(i+2);
3890193323Sed    MIB.addOperand(MO);
3891193323Sed  }
3892193323Sed  for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) {
3893193323Sed    MachineOperand &MO = MI->getOperand(i);
3894193323Sed    MIB.addOperand(MO);
3895193323Sed  }
3896193323Sed  return MIB;
3897193323Sed}
3898193323Sed
3899193323Sedstatic MachineInstr *FuseInst(MachineFunction &MF,
3900193323Sed                              unsigned Opcode, unsigned OpNo,
3901193323Sed                              const SmallVectorImpl<MachineOperand> &MOs,
3902193323Sed                              MachineInstr *MI, const TargetInstrInfo &TII) {
3903252723Sdim  // Omit the implicit operands, something BuildMI can't do.
3904193323Sed  MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
3905193323Sed                                              MI->getDebugLoc(), true);
3906252723Sdim  MachineInstrBuilder MIB(MF, NewMI);
3907218893Sdim
3908193323Sed  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
3909193323Sed    MachineOperand &MO = MI->getOperand(i);
3910193323Sed    if (i == OpNo) {
3911193323Sed      assert(MO.isReg() && "Expected to fold into reg operand!");
3912193323Sed      unsigned NumAddrOps = MOs.size();
3913193323Sed      for (unsigned i = 0; i != NumAddrOps; ++i)
3914193323Sed        MIB.addOperand(MOs[i]);
3915193323Sed      if (NumAddrOps < 4)  // FrameIndex only
3916193323Sed        addOffset(MIB, 0);
3917193323Sed    } else {
3918193323Sed      MIB.addOperand(MO);
3919193323Sed    }
3920193323Sed  }
3921193323Sed  return MIB;
3922193323Sed}
3923193323Sed
3924193323Sedstatic MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
3925193323Sed                                const SmallVectorImpl<MachineOperand> &MOs,
3926193323Sed                                MachineInstr *MI) {
3927193323Sed  MachineFunction &MF = *MI->getParent()->getParent();
3928193323Sed  MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
3929193323Sed
3930193323Sed  unsigned NumAddrOps = MOs.size();
3931193323Sed  for (unsigned i = 0; i != NumAddrOps; ++i)
3932193323Sed    MIB.addOperand(MOs[i]);
3933193323Sed  if (NumAddrOps < 4)  // FrameIndex only
3934193323Sed    addOffset(MIB, 0);
3935193323Sed  return MIB.addImm(0);
3936193323Sed}
3937193323Sed
3938193323SedMachineInstr*
3939193323SedX86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
3940193323Sed                                    MachineInstr *MI, unsigned i,
3941198090Srdivacky                                    const SmallVectorImpl<MachineOperand> &MOs,
3942198090Srdivacky                                    unsigned Size, unsigned Align) const {
3943218893Sdim  const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
3944252723Sdim  bool isCallRegIndirect = TM.getSubtarget<X86Subtarget>().callRegIndirect();
3945193323Sed  bool isTwoAddrFold = false;
3946252723Sdim
3947252723Sdim  // Atom favors register form of call. So, we do not fold loads into calls
3948252723Sdim  // when X86Subtarget is Atom.
3949252723Sdim  if (isCallRegIndirect &&
3950252723Sdim    (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) {
3951252723Sdim    return NULL;
3952252723Sdim  }
3953252723Sdim
3954193323Sed  unsigned NumOps = MI->getDesc().getNumOperands();
3955193323Sed  bool isTwoAddr = NumOps > 1 &&
3956224145Sdim    MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
3957193323Sed
3958221345Sdim  // FIXME: AsmPrinter doesn't know how to handle
3959221345Sdim  // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
3960221345Sdim  if (MI->getOpcode() == X86::ADD32ri &&
3961221345Sdim      MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
3962221345Sdim    return NULL;
3963221345Sdim
3964193323Sed  MachineInstr *NewMI = NULL;
3965193323Sed  // Folding a memory location into the two-address part of a two-address
3966193323Sed  // instruction is different than folding it other places.  It requires
3967193323Sed  // replacing the *two* registers with the memory location.
3968193323Sed  if (isTwoAddr && NumOps >= 2 && i < 2 &&
3969193323Sed      MI->getOperand(0).isReg() &&
3970193323Sed      MI->getOperand(1).isReg() &&
3971218893Sdim      MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
3972193323Sed    OpcodeTablePtr = &RegOp2MemOpTable2Addr;
3973193323Sed    isTwoAddrFold = true;
3974193323Sed  } else if (i == 0) { // If operand 0
3975263509Sdim    if (MI->getOpcode() == X86::MOV32r0) {
3976263509Sdim      NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
3977263509Sdim      if (NewMI)
3978263509Sdim        return NewMI;
3979245431Sdim    }
3980218893Sdim
3981193323Sed    OpcodeTablePtr = &RegOp2MemOpTable0;
3982193323Sed  } else if (i == 1) {
3983193323Sed    OpcodeTablePtr = &RegOp2MemOpTable1;
3984193323Sed  } else if (i == 2) {
3985193323Sed    OpcodeTablePtr = &RegOp2MemOpTable2;
3986245431Sdim  } else if (i == 3) {
3987245431Sdim    OpcodeTablePtr = &RegOp2MemOpTable3;
3988193323Sed  }
3989218893Sdim
3990193323Sed  // If table selected...
3991193323Sed  if (OpcodeTablePtr) {
3992193323Sed    // Find the Opcode to fuse
3993218893Sdim    DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
3994218893Sdim      OpcodeTablePtr->find(MI->getOpcode());
3995193323Sed    if (I != OpcodeTablePtr->end()) {
3996198090Srdivacky      unsigned Opcode = I->second.first;
3997226890Sdim      unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
3998198090Srdivacky      if (Align < MinAlign)
3999198090Srdivacky        return NULL;
4000198090Srdivacky      bool NarrowToMOV32rm = false;
4001198090Srdivacky      if (Size) {
4002245431Sdim        unsigned RCSize = getRegClass(MI->getDesc(), i, &RI, MF)->getSize();
4003198090Srdivacky        if (Size < RCSize) {
4004198090Srdivacky          // Check if it's safe to fold the load. If the size of the object is
4005198090Srdivacky          // narrower than the load width, then it's not.
4006198090Srdivacky          if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
4007198090Srdivacky            return NULL;
4008198090Srdivacky          // If this is a 64-bit load, but the spill slot is 32, then we can do
4009198090Srdivacky          // a 32-bit load which is implicitly zero-extended. This likely is due
4010198090Srdivacky          // to liveintervalanalysis remat'ing a load from stack slot.
4011198090Srdivacky          if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
4012198090Srdivacky            return NULL;
4013198090Srdivacky          Opcode = X86::MOV32rm;
4014198090Srdivacky          NarrowToMOV32rm = true;
4015198090Srdivacky        }
4016198090Srdivacky      }
4017198090Srdivacky
4018193323Sed      if (isTwoAddrFold)
4019198090Srdivacky        NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
4020193323Sed      else
4021198090Srdivacky        NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this);
4022198090Srdivacky
4023198090Srdivacky      if (NarrowToMOV32rm) {
4024198090Srdivacky        // If this is the special case where we use a MOV32rm to load a 32-bit
4025198090Srdivacky        // value and zero-extend the top bits. Change the destination register
4026198090Srdivacky        // to a 32-bit one.
4027198090Srdivacky        unsigned DstReg = NewMI->getOperand(0).getReg();
4028198090Srdivacky        if (TargetRegisterInfo::isPhysicalRegister(DstReg))
4029198090Srdivacky          NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
4030208599Srdivacky                                                   X86::sub_32bit));
4031198090Srdivacky        else
4032208599Srdivacky          NewMI->getOperand(0).setSubReg(X86::sub_32bit);
4033198090Srdivacky      }
4034193323Sed      return NewMI;
4035193323Sed    }
4036193323Sed  }
4037218893Sdim
4038218893Sdim  // No fusion
4039210299Sed  if (PrintFailedFusing && !MI->isCopy())
4040202375Srdivacky    dbgs() << "We failed to fuse operand " << i << " in " << *MI;
4041193323Sed  return NULL;
4042193323Sed}
4043193323Sed
4044226890Sdim/// hasPartialRegUpdate - Return true for all instructions that only update
4045226890Sdim/// the first 32 or 64-bits of the destination register and leave the rest
4046226890Sdim/// unmodified. This can be used to avoid folding loads if the instructions
4047226890Sdim/// only update part of the destination register, and the non-updated part is
4048226890Sdim/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these
4049226890Sdim/// instructions breaks the partial register dependency and it can improve
4050226890Sdim/// performance. e.g.:
4051226890Sdim///
4052226890Sdim///   movss (%rdi), %xmm0
4053226890Sdim///   cvtss2sd %xmm0, %xmm0
4054226890Sdim///
4055226890Sdim/// Instead of
4056226890Sdim///   cvtss2sd (%rdi), %xmm0
4057226890Sdim///
4058226890Sdim/// FIXME: This should be turned into a TSFlags.
4059226890Sdim///
4060226890Sdimstatic bool hasPartialRegUpdate(unsigned Opcode) {
4061226890Sdim  switch (Opcode) {
4062235633Sdim  case X86::CVTSI2SSrr:
4063235633Sdim  case X86::CVTSI2SS64rr:
4064235633Sdim  case X86::CVTSI2SDrr:
4065235633Sdim  case X86::CVTSI2SD64rr:
4066226890Sdim  case X86::CVTSD2SSrr:
4067226890Sdim  case X86::Int_CVTSD2SSrr:
4068226890Sdim  case X86::CVTSS2SDrr:
4069226890Sdim  case X86::Int_CVTSS2SDrr:
4070226890Sdim  case X86::RCPSSr:
4071226890Sdim  case X86::RCPSSr_Int:
4072226890Sdim  case X86::ROUNDSDr:
4073235633Sdim  case X86::ROUNDSDr_Int:
4074226890Sdim  case X86::ROUNDSSr:
4075235633Sdim  case X86::ROUNDSSr_Int:
4076226890Sdim  case X86::RSQRTSSr:
4077226890Sdim  case X86::RSQRTSSr_Int:
4078226890Sdim  case X86::SQRTSSr:
4079226890Sdim  case X86::SQRTSSr_Int:
4080226890Sdim    return true;
4081226890Sdim  }
4082193323Sed
4083226890Sdim  return false;
4084226890Sdim}
4085226890Sdim
4086235633Sdim/// getPartialRegUpdateClearance - Inform the ExeDepsFix pass how many idle
4087235633Sdim/// instructions we would like before a partial register update.
4088235633Sdimunsigned X86InstrInfo::
4089235633SdimgetPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
4090235633Sdim                             const TargetRegisterInfo *TRI) const {
4091235633Sdim  if (OpNum != 0 || !hasPartialRegUpdate(MI->getOpcode()))
4092235633Sdim    return 0;
4093235633Sdim
4094235633Sdim  // If MI is marked as reading Reg, the partial register update is wanted.
4095235633Sdim  const MachineOperand &MO = MI->getOperand(0);
4096235633Sdim  unsigned Reg = MO.getReg();
4097235633Sdim  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
4098235633Sdim    if (MO.readsReg() || MI->readsVirtualRegister(Reg))
4099235633Sdim      return 0;
4100235633Sdim  } else {
4101235633Sdim    if (MI->readsRegister(Reg, TRI))
4102235633Sdim      return 0;
4103235633Sdim  }
4104235633Sdim
4105235633Sdim  // If any of the preceding 16 instructions are reading Reg, insert a
4106235633Sdim  // dependency breaking instruction.  The magic number is based on a few
4107235633Sdim  // Nehalem experiments.
4108235633Sdim  return 16;
4109235633Sdim}
4110235633Sdim
4111263509Sdim// Return true for any instruction the copies the high bits of the first source
4112263509Sdim// operand into the unused high bits of the destination operand.
4113263509Sdimstatic bool hasUndefRegUpdate(unsigned Opcode) {
4114263509Sdim  switch (Opcode) {
4115263509Sdim  case X86::VCVTSI2SSrr:
4116263509Sdim  case X86::Int_VCVTSI2SSrr:
4117263509Sdim  case X86::VCVTSI2SS64rr:
4118263509Sdim  case X86::Int_VCVTSI2SS64rr:
4119263509Sdim  case X86::VCVTSI2SDrr:
4120263509Sdim  case X86::Int_VCVTSI2SDrr:
4121263509Sdim  case X86::VCVTSI2SD64rr:
4122263509Sdim  case X86::Int_VCVTSI2SD64rr:
4123263509Sdim  case X86::VCVTSD2SSrr:
4124263509Sdim  case X86::Int_VCVTSD2SSrr:
4125263509Sdim  case X86::VCVTSS2SDrr:
4126263509Sdim  case X86::Int_VCVTSS2SDrr:
4127263509Sdim  case X86::VRCPSSr:
4128263509Sdim  case X86::VROUNDSDr:
4129263509Sdim  case X86::VROUNDSDr_Int:
4130263509Sdim  case X86::VROUNDSSr:
4131263509Sdim  case X86::VROUNDSSr_Int:
4132263509Sdim  case X86::VRSQRTSSr:
4133263509Sdim  case X86::VSQRTSSr:
4134263509Sdim
4135263509Sdim  // AVX-512
4136263509Sdim  case X86::VCVTSD2SSZrr:
4137263509Sdim  case X86::VCVTSS2SDZrr:
4138263509Sdim    return true;
4139263509Sdim  }
4140263509Sdim
4141263509Sdim  return false;
4142263509Sdim}
4143263509Sdim
4144263509Sdim/// Inform the ExeDepsFix pass how many idle instructions we would like before
4145263509Sdim/// certain undef register reads.
4146263509Sdim///
4147263509Sdim/// This catches the VCVTSI2SD family of instructions:
4148263509Sdim///
4149263509Sdim/// vcvtsi2sdq %rax, %xmm0<undef>, %xmm14
4150263509Sdim///
4151263509Sdim/// We should to be careful *not* to catch VXOR idioms which are presumably
4152263509Sdim/// handled specially in the pipeline:
4153263509Sdim///
4154263509Sdim/// vxorps %xmm1<undef>, %xmm1<undef>, %xmm1
4155263509Sdim///
4156263509Sdim/// Like getPartialRegUpdateClearance, this makes a strong assumption that the
4157263509Sdim/// high bits that are passed-through are not live.
4158263509Sdimunsigned X86InstrInfo::
4159263509SdimgetUndefRegClearance(const MachineInstr *MI, unsigned &OpNum,
4160263509Sdim                     const TargetRegisterInfo *TRI) const {
4161263509Sdim  if (!hasUndefRegUpdate(MI->getOpcode()))
4162263509Sdim    return 0;
4163263509Sdim
4164263509Sdim  // Set the OpNum parameter to the first source operand.
4165263509Sdim  OpNum = 1;
4166263509Sdim
4167263509Sdim  const MachineOperand &MO = MI->getOperand(OpNum);
4168263509Sdim  if (MO.isUndef() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
4169263509Sdim    // Use the same magic number as getPartialRegUpdateClearance.
4170263509Sdim    return 16;
4171263509Sdim  }
4172263509Sdim  return 0;
4173263509Sdim}
4174263509Sdim
4175235633Sdimvoid X86InstrInfo::
4176235633SdimbreakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
4177235633Sdim                          const TargetRegisterInfo *TRI) const {
4178235633Sdim  unsigned Reg = MI->getOperand(OpNum).getReg();
4179263509Sdim  // If MI kills this register, the false dependence is already broken.
4180263509Sdim  if (MI->killsRegister(Reg, TRI))
4181263509Sdim    return;
4182235633Sdim  if (X86::VR128RegClass.contains(Reg)) {
4183235633Sdim    // These instructions are all floating point domain, so xorps is the best
4184235633Sdim    // choice.
4185235633Sdim    bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
4186235633Sdim    unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr;
4187235633Sdim    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg)
4188235633Sdim      .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
4189235633Sdim  } else if (X86::VR256RegClass.contains(Reg)) {
4190235633Sdim    // Use vxorps to clear the full ymm register.
4191235633Sdim    // It wants to read and write the xmm sub-register.
4192235633Sdim    unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm);
4193235633Sdim    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg)
4194235633Sdim      .addReg(XReg, RegState::Undef).addReg(XReg, RegState::Undef)
4195235633Sdim      .addReg(Reg, RegState::ImplicitDefine);
4196235633Sdim  } else
4197235633Sdim    return;
4198235633Sdim  MI->addRegisterKilled(Reg, TRI, true);
4199235633Sdim}
4200235633Sdim
4201263509Sdimstatic MachineInstr* foldPatchpoint(MachineFunction &MF,
4202263509Sdim                                    MachineInstr *MI,
4203263509Sdim                                    const SmallVectorImpl<unsigned> &Ops,
4204263509Sdim                                    int FrameIndex,
4205263509Sdim                                    const TargetInstrInfo &TII) {
4206263509Sdim  unsigned StartIdx = 0;
4207263509Sdim  switch (MI->getOpcode()) {
4208263509Sdim  case TargetOpcode::STACKMAP:
4209263509Sdim    StartIdx = 2; // Skip ID, nShadowBytes.
4210263509Sdim    break;
4211263509Sdim  case TargetOpcode::PATCHPOINT: {
4212263509Sdim    // For PatchPoint, the call args are not foldable.
4213263509Sdim    PatchPointOpers opers(MI);
4214263509Sdim    StartIdx = opers.getVarIdx();
4215263509Sdim    break;
4216263509Sdim  }
4217263509Sdim  default:
4218263509Sdim    llvm_unreachable("unexpected stackmap opcode");
4219263509Sdim  }
4220263509Sdim
4221263509Sdim  // Return false if any operands requested for folding are not foldable (not
4222263509Sdim  // part of the stackmap's live values).
4223263509Sdim  for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end();
4224263509Sdim       I != E; ++I) {
4225263509Sdim    if (*I < StartIdx)
4226263509Sdim      return 0;
4227263509Sdim  }
4228263509Sdim
4229263509Sdim  MachineInstr *NewMI =
4230263509Sdim    MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true);
4231263509Sdim  MachineInstrBuilder MIB(MF, NewMI);
4232263509Sdim
4233263509Sdim  // No need to fold return, the meta data, and function arguments
4234263509Sdim  for (unsigned i = 0; i < StartIdx; ++i)
4235263509Sdim    MIB.addOperand(MI->getOperand(i));
4236263509Sdim
4237263509Sdim  for (unsigned i = StartIdx; i < MI->getNumOperands(); ++i) {
4238263509Sdim    MachineOperand &MO = MI->getOperand(i);
4239263509Sdim    if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) {
4240263509Sdim      assert(MO.getReg() && "patchpoint can only fold a vreg operand");
4241263509Sdim      // Compute the spill slot size and offset.
4242263509Sdim      const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(MO.getReg());
4243263509Sdim      unsigned SpillSize;
4244263509Sdim      unsigned SpillOffset;
4245263509Sdim      bool Valid = TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize,
4246263509Sdim                                         SpillOffset, &MF.getTarget());
4247263509Sdim      if (!Valid)
4248263509Sdim        report_fatal_error("cannot spill patchpoint subregister operand");
4249263509Sdim
4250263509Sdim      MIB.addOperand(MachineOperand::CreateImm(StackMaps::IndirectMemRefOp));
4251263509Sdim      MIB.addOperand(MachineOperand::CreateImm(SpillSize));
4252263509Sdim      MIB.addOperand(MachineOperand::CreateFI(FrameIndex));
4253263509Sdim      addOffset(MIB, SpillOffset);
4254263509Sdim    }
4255263509Sdim    else
4256263509Sdim      MIB.addOperand(MO);
4257263509Sdim  }
4258263509Sdim  return NewMI;
4259263509Sdim}
4260263509Sdim
4261263509SdimMachineInstr*
4262263509SdimX86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
4263263509Sdim                                    const SmallVectorImpl<unsigned> &Ops,
4264263509Sdim                                    int FrameIndex) const {
4265263509Sdim  // Special case stack map and patch point intrinsics.
4266263509Sdim  if (MI->getOpcode() == TargetOpcode::STACKMAP
4267263509Sdim      || MI->getOpcode() == TargetOpcode::PATCHPOINT) {
4268263509Sdim    return foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
4269263509Sdim  }
4270218893Sdim  // Check switch flag
4271193323Sed  if (NoFusing) return NULL;
4272193323Sed
4273226890Sdim  // Unless optimizing for size, don't fold to avoid partial
4274226890Sdim  // register update stalls
4275252723Sdim  if (!MF.getFunction()->getAttributes().
4276252723Sdim        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
4277226890Sdim      hasPartialRegUpdate(MI->getOpcode()))
4278226890Sdim    return 0;
4279201360Srdivacky
4280193323Sed  const MachineFrameInfo *MFI = MF.getFrameInfo();
4281198090Srdivacky  unsigned Size = MFI->getObjectSize(FrameIndex);
4282193323Sed  unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
4283256178Sdim  // If the function stack isn't realigned we don't want to fold instructions
4284256178Sdim  // that need increased alignment.
4285256178Sdim  if (!RI.needsStackRealignment(MF))
4286256178Sdim    Alignment = std::min(Alignment, TM.getFrameLowering()->getStackAlignment());
4287193323Sed  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
4288193323Sed    unsigned NewOpc = 0;
4289198090Srdivacky    unsigned RCSize = 0;
4290193323Sed    switch (MI->getOpcode()) {
4291193323Sed    default: return NULL;
4292198090Srdivacky    case X86::TEST8rr:  NewOpc = X86::CMP8ri; RCSize = 1; break;
4293208599Srdivacky    case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
4294208599Srdivacky    case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
4295208599Srdivacky    case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
4296193323Sed    }
4297198090Srdivacky    // Check if it's safe to fold the load. If the size of the object is
4298198090Srdivacky    // narrower than the load width, then it's not.
4299198090Srdivacky    if (Size < RCSize)
4300198090Srdivacky      return NULL;
4301193323Sed    // Change to CMPXXri r, 0 first.
4302193323Sed    MI->setDesc(get(NewOpc));
4303193323Sed    MI->getOperand(1).ChangeToImmediate(0);
4304193323Sed  } else if (Ops.size() != 1)
4305193323Sed    return NULL;
4306193323Sed
4307193323Sed  SmallVector<MachineOperand,4> MOs;
4308193323Sed  MOs.push_back(MachineOperand::CreateFI(FrameIndex));
4309198090Srdivacky  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
4310193323Sed}
4311193323Sed
4312193323SedMachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
4313193323Sed                                                  MachineInstr *MI,
4314198090Srdivacky                                           const SmallVectorImpl<unsigned> &Ops,
4315193323Sed                                                  MachineInstr *LoadMI) const {
4316263509Sdim  // If loading from a FrameIndex, fold directly from the FrameIndex.
4317263509Sdim  unsigned NumOps = LoadMI->getDesc().getNumOperands();
4318263509Sdim  int FrameIndex;
4319263509Sdim  if (isLoadFromStackSlot(LoadMI, FrameIndex))
4320263509Sdim    return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
4321263509Sdim
4322218893Sdim  // Check switch flag
4323193323Sed  if (NoFusing) return NULL;
4324193323Sed
4325226890Sdim  // Unless optimizing for size, don't fold to avoid partial
4326226890Sdim  // register update stalls
4327252723Sdim  if (!MF.getFunction()->getAttributes().
4328252723Sdim        hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
4329226890Sdim      hasPartialRegUpdate(MI->getOpcode()))
4330226890Sdim    return 0;
4331201360Srdivacky
4332193323Sed  // Determine the alignment of the load.
4333193323Sed  unsigned Alignment = 0;
4334193323Sed  if (LoadMI->hasOneMemOperand())
4335198090Srdivacky    Alignment = (*LoadMI->memoperands_begin())->getAlignment();
4336198090Srdivacky  else
4337198090Srdivacky    switch (LoadMI->getOpcode()) {
4338235633Sdim    case X86::AVX2_SETALLONES:
4339245431Sdim    case X86::AVX_SET0:
4340212904Sdim      Alignment = 32;
4341212904Sdim      break;
4342226890Sdim    case X86::V_SET0:
4343198090Srdivacky    case X86::V_SETALLONES:
4344198090Srdivacky      Alignment = 16;
4345198090Srdivacky      break;
4346198090Srdivacky    case X86::FsFLD0SD:
4347198090Srdivacky      Alignment = 8;
4348198090Srdivacky      break;
4349198090Srdivacky    case X86::FsFLD0SS:
4350198090Srdivacky      Alignment = 4;
4351198090Srdivacky      break;
4352198090Srdivacky    default:
4353223017Sdim      return 0;
4354193323Sed    }
4355193323Sed  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
4356193323Sed    unsigned NewOpc = 0;
4357193323Sed    switch (MI->getOpcode()) {
4358193323Sed    default: return NULL;
4359193323Sed    case X86::TEST8rr:  NewOpc = X86::CMP8ri; break;
4360208599Srdivacky    case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
4361208599Srdivacky    case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
4362208599Srdivacky    case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
4363193323Sed    }
4364193323Sed    // Change to CMPXXri r, 0 first.
4365193323Sed    MI->setDesc(get(NewOpc));
4366193323Sed    MI->getOperand(1).ChangeToImmediate(0);
4367193323Sed  } else if (Ops.size() != 1)
4368193323Sed    return NULL;
4369193323Sed
4370212904Sdim  // Make sure the subregisters match.
4371212904Sdim  // Otherwise we risk changing the size of the load.
4372212904Sdim  if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
4373212904Sdim    return NULL;
4374212904Sdim
4375210299Sed  SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
4376198090Srdivacky  switch (LoadMI->getOpcode()) {
4377226890Sdim  case X86::V_SET0:
4378198090Srdivacky  case X86::V_SETALLONES:
4379235633Sdim  case X86::AVX2_SETALLONES:
4380245431Sdim  case X86::AVX_SET0:
4381198090Srdivacky  case X86::FsFLD0SD:
4382235633Sdim  case X86::FsFLD0SS: {
4383226890Sdim    // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
4384193323Sed    // Create a constant-pool entry and operands to load from it.
4385193323Sed
4386204961Srdivacky    // Medium and large mode can't fold loads this way.
4387204961Srdivacky    if (TM.getCodeModel() != CodeModel::Small &&
4388204961Srdivacky        TM.getCodeModel() != CodeModel::Kernel)
4389204961Srdivacky      return NULL;
4390204961Srdivacky
4391193323Sed    // x86-32 PIC requires a PIC base register for constant pools.
4392193323Sed    unsigned PICBase = 0;
4393198090Srdivacky    if (TM.getRelocationModel() == Reloc::PIC_) {
4394198090Srdivacky      if (TM.getSubtarget<X86Subtarget>().is64Bit())
4395198090Srdivacky        PICBase = X86::RIP;
4396198090Srdivacky      else
4397210299Sed        // FIXME: PICBase = getGlobalBaseReg(&MF);
4398198090Srdivacky        // This doesn't work for several reasons.
4399198090Srdivacky        // 1. GlobalBaseReg may have been spilled.
4400198090Srdivacky        // 2. It may not be live at MI.
4401198090Srdivacky        return NULL;
4402198090Srdivacky    }
4403193323Sed
4404198090Srdivacky    // Create a constant-pool entry.
4405193323Sed    MachineConstantPool &MCP = *MF.getConstantPool();
4406226890Sdim    Type *Ty;
4407212904Sdim    unsigned Opc = LoadMI->getOpcode();
4408235633Sdim    if (Opc == X86::FsFLD0SS)
4409198090Srdivacky      Ty = Type::getFloatTy(MF.getFunction()->getContext());
4410235633Sdim    else if (Opc == X86::FsFLD0SD)
4411198090Srdivacky      Ty = Type::getDoubleTy(MF.getFunction()->getContext());
4412245431Sdim    else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0)
4413235633Sdim      Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8);
4414198090Srdivacky    else
4415198090Srdivacky      Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
4416226890Sdim
4417245431Sdim    bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX2_SETALLONES);
4418226890Sdim    const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
4419226890Sdim                                    Constant::getNullValue(Ty);
4420198090Srdivacky    unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
4421193323Sed
4422193323Sed    // Create operands to load from the constant pool entry.
4423193323Sed    MOs.push_back(MachineOperand::CreateReg(PICBase, false));
4424193323Sed    MOs.push_back(MachineOperand::CreateImm(1));
4425193323Sed    MOs.push_back(MachineOperand::CreateReg(0, false));
4426193323Sed    MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
4427193323Sed    MOs.push_back(MachineOperand::CreateReg(0, false));
4428198090Srdivacky    break;
4429198090Srdivacky  }
4430198090Srdivacky  default: {
4431252723Sdim    if ((LoadMI->getOpcode() == X86::MOVSSrm ||
4432252723Sdim         LoadMI->getOpcode() == X86::VMOVSSrm) &&
4433252723Sdim        MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
4434252723Sdim          > 4)
4435252723Sdim      // These instructions only load 32 bits, we can't fold them if the
4436252723Sdim      // destination register is wider than 32 bits (4 bytes).
4437252723Sdim      return NULL;
4438252723Sdim    if ((LoadMI->getOpcode() == X86::MOVSDrm ||
4439252723Sdim         LoadMI->getOpcode() == X86::VMOVSDrm) &&
4440252723Sdim        MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
4441252723Sdim          > 8)
4442252723Sdim      // These instructions only load 64 bits, we can't fold them if the
4443252723Sdim      // destination register is wider than 64 bits (8 bytes).
4444252723Sdim      return NULL;
4445252723Sdim
4446193323Sed    // Folding a normal load. Just copy the load's address operands.
4447210299Sed    for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
4448193323Sed      MOs.push_back(LoadMI->getOperand(i));
4449198090Srdivacky    break;
4450193323Sed  }
4451198090Srdivacky  }
4452198090Srdivacky  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
4453193323Sed}
4454193323Sed
4455193323Sed
4456193323Sedbool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
4457193323Sed                                  const SmallVectorImpl<unsigned> &Ops) const {
4458218893Sdim  // Check switch flag
4459193323Sed  if (NoFusing) return 0;
4460193323Sed
4461193323Sed  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
4462193323Sed    switch (MI->getOpcode()) {
4463193323Sed    default: return false;
4464218893Sdim    case X86::TEST8rr:
4465193323Sed    case X86::TEST16rr:
4466193323Sed    case X86::TEST32rr:
4467193323Sed    case X86::TEST64rr:
4468193323Sed      return true;
4469221345Sdim    case X86::ADD32ri:
4470221345Sdim      // FIXME: AsmPrinter doesn't know how to handle
4471221345Sdim      // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
4472221345Sdim      if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
4473221345Sdim        return false;
4474221345Sdim      break;
4475193323Sed    }
4476193323Sed  }
4477193323Sed
4478193323Sed  if (Ops.size() != 1)
4479193323Sed    return false;
4480193323Sed
4481193323Sed  unsigned OpNum = Ops[0];
4482193323Sed  unsigned Opc = MI->getOpcode();
4483193323Sed  unsigned NumOps = MI->getDesc().getNumOperands();
4484193323Sed  bool isTwoAddr = NumOps > 1 &&
4485224145Sdim    MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
4486193323Sed
4487193323Sed  // Folding a memory location into the two-address part of a two-address
4488193323Sed  // instruction is different than folding it other places.  It requires
4489193323Sed  // replacing the *two* registers with the memory location.
4490218893Sdim  const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
4491218893Sdim  if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
4492193323Sed    OpcodeTablePtr = &RegOp2MemOpTable2Addr;
4493193323Sed  } else if (OpNum == 0) { // If operand 0
4494263509Sdim    if (Opc == X86::MOV32r0)
4495263509Sdim      return true;
4496263509Sdim
4497193323Sed    OpcodeTablePtr = &RegOp2MemOpTable0;
4498193323Sed  } else if (OpNum == 1) {
4499193323Sed    OpcodeTablePtr = &RegOp2MemOpTable1;
4500193323Sed  } else if (OpNum == 2) {
4501193323Sed    OpcodeTablePtr = &RegOp2MemOpTable2;
4502245431Sdim  } else if (OpNum == 3) {
4503245431Sdim    OpcodeTablePtr = &RegOp2MemOpTable3;
4504193323Sed  }
4505218893Sdim
4506218893Sdim  if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
4507218893Sdim    return true;
4508252723Sdim  return TargetInstrInfo::canFoldMemoryOperand(MI, Ops);
4509193323Sed}
4510193323Sed
4511193323Sedbool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
4512193323Sed                                unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
4513193323Sed                                SmallVectorImpl<MachineInstr*> &NewMIs) const {
4514218893Sdim  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
4515218893Sdim    MemOp2RegOpTable.find(MI->getOpcode());
4516193323Sed  if (I == MemOp2RegOpTable.end())
4517193323Sed    return false;
4518193323Sed  unsigned Opc = I->second.first;
4519226890Sdim  unsigned Index = I->second.second & TB_INDEX_MASK;
4520226890Sdim  bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
4521226890Sdim  bool FoldedStore = I->second.second & TB_FOLDED_STORE;
4522193323Sed  if (UnfoldLoad && !FoldedLoad)
4523193323Sed    return false;
4524193323Sed  UnfoldLoad &= FoldedLoad;
4525193323Sed  if (UnfoldStore && !FoldedStore)
4526193323Sed    return false;
4527193323Sed  UnfoldStore &= FoldedStore;
4528193323Sed
4529224145Sdim  const MCInstrDesc &MCID = get(Opc);
4530245431Sdim  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
4531210299Sed  if (!MI->hasOneMemOperand() &&
4532210299Sed      RC == &X86::VR128RegClass &&
4533210299Sed      !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
4534210299Sed    // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
4535210299Sed    // conservatively assume the address is unaligned. That's bad for
4536210299Sed    // performance.
4537210299Sed    return false;
4538210299Sed  SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps;
4539193323Sed  SmallVector<MachineOperand,2> BeforeOps;
4540193323Sed  SmallVector<MachineOperand,2> AfterOps;
4541193323Sed  SmallVector<MachineOperand,4> ImpOps;
4542193323Sed  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
4543193323Sed    MachineOperand &Op = MI->getOperand(i);
4544210299Sed    if (i >= Index && i < Index + X86::AddrNumOperands)
4545193323Sed      AddrOps.push_back(Op);
4546193323Sed    else if (Op.isReg() && Op.isImplicit())
4547193323Sed      ImpOps.push_back(Op);
4548193323Sed    else if (i < Index)
4549193323Sed      BeforeOps.push_back(Op);
4550193323Sed    else if (i > Index)
4551193323Sed      AfterOps.push_back(Op);
4552193323Sed  }
4553193323Sed
4554193323Sed  // Emit the load instruction.
4555193323Sed  if (UnfoldLoad) {
4556198090Srdivacky    std::pair<MachineInstr::mmo_iterator,
4557198090Srdivacky              MachineInstr::mmo_iterator> MMOs =
4558198090Srdivacky      MF.extractLoadMemRefs(MI->memoperands_begin(),
4559198090Srdivacky                            MI->memoperands_end());
4560198090Srdivacky    loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
4561193323Sed    if (UnfoldStore) {
4562193323Sed      // Address operands cannot be marked isKill.
4563210299Sed      for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
4564193323Sed        MachineOperand &MO = NewMIs[0]->getOperand(i);
4565193323Sed        if (MO.isReg())
4566193323Sed          MO.setIsKill(false);
4567193323Sed      }
4568193323Sed    }
4569193323Sed  }
4570193323Sed
4571193323Sed  // Emit the data processing instruction.
4572224145Sdim  MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true);
4573252723Sdim  MachineInstrBuilder MIB(MF, DataMI);
4574218893Sdim
4575193323Sed  if (FoldedStore)
4576193323Sed    MIB.addReg(Reg, RegState::Define);
4577193323Sed  for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
4578193323Sed    MIB.addOperand(BeforeOps[i]);
4579193323Sed  if (FoldedLoad)
4580193323Sed    MIB.addReg(Reg);
4581193323Sed  for (unsigned i = 0, e = AfterOps.size(); i != e; ++i)
4582193323Sed    MIB.addOperand(AfterOps[i]);
4583193323Sed  for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) {
4584193323Sed    MachineOperand &MO = ImpOps[i];
4585193323Sed    MIB.addReg(MO.getReg(),
4586193323Sed               getDefRegState(MO.isDef()) |
4587193323Sed               RegState::Implicit |
4588193323Sed               getKillRegState(MO.isKill()) |
4589195340Sed               getDeadRegState(MO.isDead()) |
4590195340Sed               getUndefRegState(MO.isUndef()));
4591193323Sed  }
4592193323Sed  // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
4593193323Sed  switch (DataMI->getOpcode()) {
4594193323Sed  default: break;
4595193323Sed  case X86::CMP64ri32:
4596208599Srdivacky  case X86::CMP64ri8:
4597193323Sed  case X86::CMP32ri:
4598208599Srdivacky  case X86::CMP32ri8:
4599193323Sed  case X86::CMP16ri:
4600208599Srdivacky  case X86::CMP16ri8:
4601193323Sed  case X86::CMP8ri: {
4602193323Sed    MachineOperand &MO0 = DataMI->getOperand(0);
4603193323Sed    MachineOperand &MO1 = DataMI->getOperand(1);
4604193323Sed    if (MO1.getImm() == 0) {
4605245431Sdim      unsigned NewOpc;
4606193323Sed      switch (DataMI->getOpcode()) {
4607245431Sdim      default: llvm_unreachable("Unreachable!");
4608208599Srdivacky      case X86::CMP64ri8:
4609193323Sed      case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
4610208599Srdivacky      case X86::CMP32ri8:
4611193323Sed      case X86::CMP32ri:   NewOpc = X86::TEST32rr; break;
4612208599Srdivacky      case X86::CMP16ri8:
4613193323Sed      case X86::CMP16ri:   NewOpc = X86::TEST16rr; break;
4614193323Sed      case X86::CMP8ri:    NewOpc = X86::TEST8rr; break;
4615193323Sed      }
4616193323Sed      DataMI->setDesc(get(NewOpc));
4617193323Sed      MO1.ChangeToRegister(MO0.getReg(), false);
4618193323Sed    }
4619193323Sed  }
4620193323Sed  }
4621193323Sed  NewMIs.push_back(DataMI);
4622193323Sed
4623193323Sed  // Emit the store instruction.
4624193323Sed  if (UnfoldStore) {
4625245431Sdim    const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI, MF);
4626198090Srdivacky    std::pair<MachineInstr::mmo_iterator,
4627198090Srdivacky              MachineInstr::mmo_iterator> MMOs =
4628198090Srdivacky      MF.extractStoreMemRefs(MI->memoperands_begin(),
4629198090Srdivacky                             MI->memoperands_end());
4630198090Srdivacky    storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
4631193323Sed  }
4632193323Sed
4633193323Sed  return true;
4634193323Sed}
4635193323Sed
4636193323Sedbool
4637193323SedX86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
4638193323Sed                                  SmallVectorImpl<SDNode*> &NewNodes) const {
4639193323Sed  if (!N->isMachineOpcode())
4640193323Sed    return false;
4641193323Sed
4642218893Sdim  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
4643218893Sdim    MemOp2RegOpTable.find(N->getMachineOpcode());
4644193323Sed  if (I == MemOp2RegOpTable.end())
4645193323Sed    return false;
4646193323Sed  unsigned Opc = I->second.first;
4647226890Sdim  unsigned Index = I->second.second & TB_INDEX_MASK;
4648226890Sdim  bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
4649226890Sdim  bool FoldedStore = I->second.second & TB_FOLDED_STORE;
4650224145Sdim  const MCInstrDesc &MCID = get(Opc);
4651245431Sdim  MachineFunction &MF = DAG.getMachineFunction();
4652245431Sdim  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
4653224145Sdim  unsigned NumDefs = MCID.NumDefs;
4654193323Sed  std::vector<SDValue> AddrOps;
4655193323Sed  std::vector<SDValue> BeforeOps;
4656193323Sed  std::vector<SDValue> AfterOps;
4657263509Sdim  SDLoc dl(N);
4658193323Sed  unsigned NumOps = N->getNumOperands();
4659193323Sed  for (unsigned i = 0; i != NumOps-1; ++i) {
4660193323Sed    SDValue Op = N->getOperand(i);
4661210299Sed    if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
4662193323Sed      AddrOps.push_back(Op);
4663193323Sed    else if (i < Index-NumDefs)
4664193323Sed      BeforeOps.push_back(Op);
4665193323Sed    else if (i > Index-NumDefs)
4666193323Sed      AfterOps.push_back(Op);
4667193323Sed  }
4668193323Sed  SDValue Chain = N->getOperand(NumOps-1);
4669193323Sed  AddrOps.push_back(Chain);
4670193323Sed
4671193323Sed  // Emit the load instruction.
4672193323Sed  SDNode *Load = 0;
4673193323Sed  if (FoldedLoad) {
4674198090Srdivacky    EVT VT = *RC->vt_begin();
4675199481Srdivacky    std::pair<MachineInstr::mmo_iterator,
4676199481Srdivacky              MachineInstr::mmo_iterator> MMOs =
4677199481Srdivacky      MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
4678199481Srdivacky                            cast<MachineSDNode>(N)->memoperands_end());
4679210299Sed    if (!(*MMOs.first) &&
4680210299Sed        RC == &X86::VR128RegClass &&
4681210299Sed        !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
4682210299Sed      // Do not introduce a slow unaligned load.
4683210299Sed      return false;
4684226890Sdim    unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
4685226890Sdim    bool isAligned = (*MMOs.first) &&
4686226890Sdim                     (*MMOs.first)->getAlignment() >= Alignment;
4687198090Srdivacky    Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
4688252723Sdim                              VT, MVT::Other, AddrOps);
4689193323Sed    NewNodes.push_back(Load);
4690198090Srdivacky
4691198090Srdivacky    // Preserve memory reference information.
4692198090Srdivacky    cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
4693193323Sed  }
4694193323Sed
4695193323Sed  // Emit the data processing instruction.
4696198090Srdivacky  std::vector<EVT> VTs;
4697193323Sed  const TargetRegisterClass *DstRC = 0;
4698224145Sdim  if (MCID.getNumDefs() > 0) {
4699245431Sdim    DstRC = getRegClass(MCID, 0, &RI, MF);
4700193323Sed    VTs.push_back(*DstRC->vt_begin());
4701193323Sed  }
4702193323Sed  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
4703198090Srdivacky    EVT VT = N->getValueType(i);
4704224145Sdim    if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs())
4705193323Sed      VTs.push_back(VT);
4706193323Sed  }
4707193323Sed  if (Load)
4708193323Sed    BeforeOps.push_back(SDValue(Load, 0));
4709193323Sed  std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
4710252723Sdim  SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, BeforeOps);
4711193323Sed  NewNodes.push_back(NewNode);
4712193323Sed
4713193323Sed  // Emit the store instruction.
4714193323Sed  if (FoldedStore) {
4715193323Sed    AddrOps.pop_back();
4716193323Sed    AddrOps.push_back(SDValue(NewNode, 0));
4717193323Sed    AddrOps.push_back(Chain);
4718199481Srdivacky    std::pair<MachineInstr::mmo_iterator,
4719199481Srdivacky              MachineInstr::mmo_iterator> MMOs =
4720199481Srdivacky      MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
4721199481Srdivacky                             cast<MachineSDNode>(N)->memoperands_end());
4722210299Sed    if (!(*MMOs.first) &&
4723210299Sed        RC == &X86::VR128RegClass &&
4724210299Sed        !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
4725210299Sed      // Do not introduce a slow unaligned store.
4726210299Sed      return false;
4727226890Sdim    unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
4728226890Sdim    bool isAligned = (*MMOs.first) &&
4729226890Sdim                     (*MMOs.first)->getAlignment() >= Alignment;
4730198090Srdivacky    SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
4731198090Srdivacky                                                         isAligned, TM),
4732252723Sdim                                       dl, MVT::Other, AddrOps);
4733193323Sed    NewNodes.push_back(Store);
4734198090Srdivacky
4735198090Srdivacky    // Preserve memory reference information.
4736198090Srdivacky    cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
4737193323Sed  }
4738193323Sed
4739193323Sed  return true;
4740193323Sed}
4741193323Sed
4742193323Sedunsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
4743198892Srdivacky                                      bool UnfoldLoad, bool UnfoldStore,
4744198892Srdivacky                                      unsigned *LoadRegIndex) const {
4745218893Sdim  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
4746218893Sdim    MemOp2RegOpTable.find(Opc);
4747193323Sed  if (I == MemOp2RegOpTable.end())
4748193323Sed    return 0;
4749226890Sdim  bool FoldedLoad = I->second.second & TB_FOLDED_LOAD;
4750226890Sdim  bool FoldedStore = I->second.second & TB_FOLDED_STORE;
4751193323Sed  if (UnfoldLoad && !FoldedLoad)
4752193323Sed    return 0;
4753193323Sed  if (UnfoldStore && !FoldedStore)
4754193323Sed    return 0;
4755198892Srdivacky  if (LoadRegIndex)
4756226890Sdim    *LoadRegIndex = I->second.second & TB_INDEX_MASK;
4757193323Sed  return I->second.first;
4758193323Sed}
4759193323Sed
4760202878Srdivackybool
4761202878SrdivackyX86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
4762202878Srdivacky                                     int64_t &Offset1, int64_t &Offset2) const {
4763202878Srdivacky  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
4764202878Srdivacky    return false;
4765202878Srdivacky  unsigned Opc1 = Load1->getMachineOpcode();
4766202878Srdivacky  unsigned Opc2 = Load2->getMachineOpcode();
4767202878Srdivacky  switch (Opc1) {
4768202878Srdivacky  default: return false;
4769202878Srdivacky  case X86::MOV8rm:
4770202878Srdivacky  case X86::MOV16rm:
4771202878Srdivacky  case X86::MOV32rm:
4772202878Srdivacky  case X86::MOV64rm:
4773202878Srdivacky  case X86::LD_Fp32m:
4774202878Srdivacky  case X86::LD_Fp64m:
4775202878Srdivacky  case X86::LD_Fp80m:
4776202878Srdivacky  case X86::MOVSSrm:
4777202878Srdivacky  case X86::MOVSDrm:
4778202878Srdivacky  case X86::MMX_MOVD64rm:
4779202878Srdivacky  case X86::MMX_MOVQ64rm:
4780202878Srdivacky  case X86::FsMOVAPSrm:
4781202878Srdivacky  case X86::FsMOVAPDrm:
4782202878Srdivacky  case X86::MOVAPSrm:
4783202878Srdivacky  case X86::MOVUPSrm:
4784202878Srdivacky  case X86::MOVAPDrm:
4785202878Srdivacky  case X86::MOVDQArm:
4786202878Srdivacky  case X86::MOVDQUrm:
4787226890Sdim  // AVX load instructions
4788226890Sdim  case X86::VMOVSSrm:
4789226890Sdim  case X86::VMOVSDrm:
4790226890Sdim  case X86::FsVMOVAPSrm:
4791226890Sdim  case X86::FsVMOVAPDrm:
4792226890Sdim  case X86::VMOVAPSrm:
4793226890Sdim  case X86::VMOVUPSrm:
4794226890Sdim  case X86::VMOVAPDrm:
4795226890Sdim  case X86::VMOVDQArm:
4796226890Sdim  case X86::VMOVDQUrm:
4797224145Sdim  case X86::VMOVAPSYrm:
4798224145Sdim  case X86::VMOVUPSYrm:
4799224145Sdim  case X86::VMOVAPDYrm:
4800224145Sdim  case X86::VMOVDQAYrm:
4801224145Sdim  case X86::VMOVDQUYrm:
4802202878Srdivacky    break;
4803202878Srdivacky  }
4804202878Srdivacky  switch (Opc2) {
4805202878Srdivacky  default: return false;
4806202878Srdivacky  case X86::MOV8rm:
4807202878Srdivacky  case X86::MOV16rm:
4808202878Srdivacky  case X86::MOV32rm:
4809202878Srdivacky  case X86::MOV64rm:
4810202878Srdivacky  case X86::LD_Fp32m:
4811202878Srdivacky  case X86::LD_Fp64m:
4812202878Srdivacky  case X86::LD_Fp80m:
4813202878Srdivacky  case X86::MOVSSrm:
4814202878Srdivacky  case X86::MOVSDrm:
4815202878Srdivacky  case X86::MMX_MOVD64rm:
4816202878Srdivacky  case X86::MMX_MOVQ64rm:
4817202878Srdivacky  case X86::FsMOVAPSrm:
4818202878Srdivacky  case X86::FsMOVAPDrm:
4819202878Srdivacky  case X86::MOVAPSrm:
4820202878Srdivacky  case X86::MOVUPSrm:
4821202878Srdivacky  case X86::MOVAPDrm:
4822202878Srdivacky  case X86::MOVDQArm:
4823202878Srdivacky  case X86::MOVDQUrm:
4824226890Sdim  // AVX load instructions
4825226890Sdim  case X86::VMOVSSrm:
4826226890Sdim  case X86::VMOVSDrm:
4827226890Sdim  case X86::FsVMOVAPSrm:
4828226890Sdim  case X86::FsVMOVAPDrm:
4829226890Sdim  case X86::VMOVAPSrm:
4830226890Sdim  case X86::VMOVUPSrm:
4831226890Sdim  case X86::VMOVAPDrm:
4832226890Sdim  case X86::VMOVDQArm:
4833226890Sdim  case X86::VMOVDQUrm:
4834224145Sdim  case X86::VMOVAPSYrm:
4835224145Sdim  case X86::VMOVUPSYrm:
4836224145Sdim  case X86::VMOVAPDYrm:
4837224145Sdim  case X86::VMOVDQAYrm:
4838224145Sdim  case X86::VMOVDQUYrm:
4839202878Srdivacky    break;
4840202878Srdivacky  }
4841202878Srdivacky
4842202878Srdivacky  // Check if chain operands and base addresses match.
4843202878Srdivacky  if (Load1->getOperand(0) != Load2->getOperand(0) ||
4844202878Srdivacky      Load1->getOperand(5) != Load2->getOperand(5))
4845202878Srdivacky    return false;
4846202878Srdivacky  // Segment operands should match as well.
4847202878Srdivacky  if (Load1->getOperand(4) != Load2->getOperand(4))
4848202878Srdivacky    return false;
4849202878Srdivacky  // Scale should be 1, Index should be Reg0.
4850202878Srdivacky  if (Load1->getOperand(1) == Load2->getOperand(1) &&
4851202878Srdivacky      Load1->getOperand(2) == Load2->getOperand(2)) {
4852202878Srdivacky    if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1)
4853202878Srdivacky      return false;
4854202878Srdivacky
4855202878Srdivacky    // Now let's examine the displacements.
4856202878Srdivacky    if (isa<ConstantSDNode>(Load1->getOperand(3)) &&
4857202878Srdivacky        isa<ConstantSDNode>(Load2->getOperand(3))) {
4858202878Srdivacky      Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue();
4859202878Srdivacky      Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue();
4860202878Srdivacky      return true;
4861202878Srdivacky    }
4862202878Srdivacky  }
4863202878Srdivacky  return false;
4864202878Srdivacky}
4865202878Srdivacky
4866202878Srdivackybool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
4867202878Srdivacky                                           int64_t Offset1, int64_t Offset2,
4868202878Srdivacky                                           unsigned NumLoads) const {
4869202878Srdivacky  assert(Offset2 > Offset1);
4870202878Srdivacky  if ((Offset2 - Offset1) / 8 > 64)
4871202878Srdivacky    return false;
4872202878Srdivacky
4873202878Srdivacky  unsigned Opc1 = Load1->getMachineOpcode();
4874202878Srdivacky  unsigned Opc2 = Load2->getMachineOpcode();
4875202878Srdivacky  if (Opc1 != Opc2)
4876202878Srdivacky    return false;  // FIXME: overly conservative?
4877202878Srdivacky
4878202878Srdivacky  switch (Opc1) {
4879202878Srdivacky  default: break;
4880202878Srdivacky  case X86::LD_Fp32m:
4881202878Srdivacky  case X86::LD_Fp64m:
4882202878Srdivacky  case X86::LD_Fp80m:
4883202878Srdivacky  case X86::MMX_MOVD64rm:
4884202878Srdivacky  case X86::MMX_MOVQ64rm:
4885202878Srdivacky    return false;
4886202878Srdivacky  }
4887202878Srdivacky
4888202878Srdivacky  EVT VT = Load1->getValueType(0);
4889202878Srdivacky  switch (VT.getSimpleVT().SimpleTy) {
4890210299Sed  default:
4891202878Srdivacky    // XMM registers. In 64-bit mode we can be a bit more aggressive since we
4892202878Srdivacky    // have 16 of them to play with.
4893202878Srdivacky    if (TM.getSubtargetImpl()->is64Bit()) {
4894202878Srdivacky      if (NumLoads >= 3)
4895202878Srdivacky        return false;
4896210299Sed    } else if (NumLoads) {
4897202878Srdivacky      return false;
4898210299Sed    }
4899202878Srdivacky    break;
4900202878Srdivacky  case MVT::i8:
4901202878Srdivacky  case MVT::i16:
4902202878Srdivacky  case MVT::i32:
4903202878Srdivacky  case MVT::i64:
4904202878Srdivacky  case MVT::f32:
4905202878Srdivacky  case MVT::f64:
4906202878Srdivacky    if (NumLoads)
4907202878Srdivacky      return false;
4908210299Sed    break;
4909202878Srdivacky  }
4910202878Srdivacky
4911202878Srdivacky  return true;
4912202878Srdivacky}
4913202878Srdivacky
4914263509Sdimbool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First,
4915263509Sdim                                          MachineInstr *Second) const {
4916263509Sdim  // Check if this processor supports macro-fusion. Since this is a minor
4917263509Sdim  // heuristic, we haven't specifically reserved a feature. hasAVX is a decent
4918263509Sdim  // proxy for SandyBridge+.
4919263509Sdim  if (!TM.getSubtarget<X86Subtarget>().hasAVX())
4920263509Sdim    return false;
4921202878Srdivacky
4922263509Sdim  enum {
4923263509Sdim    FuseTest,
4924263509Sdim    FuseCmp,
4925263509Sdim    FuseInc
4926263509Sdim  } FuseKind;
4927263509Sdim
4928263509Sdim  switch(Second->getOpcode()) {
4929263509Sdim  default:
4930263509Sdim    return false;
4931263509Sdim  case X86::JE_4:
4932263509Sdim  case X86::JNE_4:
4933263509Sdim  case X86::JL_4:
4934263509Sdim  case X86::JLE_4:
4935263509Sdim  case X86::JG_4:
4936263509Sdim  case X86::JGE_4:
4937263509Sdim    FuseKind = FuseInc;
4938263509Sdim    break;
4939263509Sdim  case X86::JB_4:
4940263509Sdim  case X86::JBE_4:
4941263509Sdim  case X86::JA_4:
4942263509Sdim  case X86::JAE_4:
4943263509Sdim    FuseKind = FuseCmp;
4944263509Sdim    break;
4945263509Sdim  case X86::JS_4:
4946263509Sdim  case X86::JNS_4:
4947263509Sdim  case X86::JP_4:
4948263509Sdim  case X86::JNP_4:
4949263509Sdim  case X86::JO_4:
4950263509Sdim  case X86::JNO_4:
4951263509Sdim    FuseKind = FuseTest;
4952263509Sdim    break;
4953263509Sdim  }
4954263509Sdim  switch (First->getOpcode()) {
4955263509Sdim  default:
4956263509Sdim    return false;
4957263509Sdim  case X86::TEST8rr:
4958263509Sdim  case X86::TEST16rr:
4959263509Sdim  case X86::TEST32rr:
4960263509Sdim  case X86::TEST64rr:
4961263509Sdim  case X86::TEST8ri:
4962263509Sdim  case X86::TEST16ri:
4963263509Sdim  case X86::TEST32ri:
4964263509Sdim  case X86::TEST32i32:
4965263509Sdim  case X86::TEST64i32:
4966263509Sdim  case X86::TEST64ri32:
4967263509Sdim  case X86::TEST8rm:
4968263509Sdim  case X86::TEST16rm:
4969263509Sdim  case X86::TEST32rm:
4970263509Sdim  case X86::TEST64rm:
4971263509Sdim  case X86::AND16i16:
4972263509Sdim  case X86::AND16ri:
4973263509Sdim  case X86::AND16ri8:
4974263509Sdim  case X86::AND16rm:
4975263509Sdim  case X86::AND16rr:
4976263509Sdim  case X86::AND32i32:
4977263509Sdim  case X86::AND32ri:
4978263509Sdim  case X86::AND32ri8:
4979263509Sdim  case X86::AND32rm:
4980263509Sdim  case X86::AND32rr:
4981263509Sdim  case X86::AND64i32:
4982263509Sdim  case X86::AND64ri32:
4983263509Sdim  case X86::AND64ri8:
4984263509Sdim  case X86::AND64rm:
4985263509Sdim  case X86::AND64rr:
4986263509Sdim  case X86::AND8i8:
4987263509Sdim  case X86::AND8ri:
4988263509Sdim  case X86::AND8rm:
4989263509Sdim  case X86::AND8rr:
4990263509Sdim    return true;
4991263509Sdim  case X86::CMP16i16:
4992263509Sdim  case X86::CMP16ri:
4993263509Sdim  case X86::CMP16ri8:
4994263509Sdim  case X86::CMP16rm:
4995263509Sdim  case X86::CMP16rr:
4996263509Sdim  case X86::CMP32i32:
4997263509Sdim  case X86::CMP32ri:
4998263509Sdim  case X86::CMP32ri8:
4999263509Sdim  case X86::CMP32rm:
5000263509Sdim  case X86::CMP32rr:
5001263509Sdim  case X86::CMP64i32:
5002263509Sdim  case X86::CMP64ri32:
5003263509Sdim  case X86::CMP64ri8:
5004263509Sdim  case X86::CMP64rm:
5005263509Sdim  case X86::CMP64rr:
5006263509Sdim  case X86::CMP8i8:
5007263509Sdim  case X86::CMP8ri:
5008263509Sdim  case X86::CMP8rm:
5009263509Sdim  case X86::CMP8rr:
5010263509Sdim  case X86::ADD16i16:
5011263509Sdim  case X86::ADD16ri:
5012263509Sdim  case X86::ADD16ri8:
5013263509Sdim  case X86::ADD16ri8_DB:
5014263509Sdim  case X86::ADD16ri_DB:
5015263509Sdim  case X86::ADD16rm:
5016263509Sdim  case X86::ADD16rr:
5017263509Sdim  case X86::ADD16rr_DB:
5018263509Sdim  case X86::ADD32i32:
5019263509Sdim  case X86::ADD32ri:
5020263509Sdim  case X86::ADD32ri8:
5021263509Sdim  case X86::ADD32ri8_DB:
5022263509Sdim  case X86::ADD32ri_DB:
5023263509Sdim  case X86::ADD32rm:
5024263509Sdim  case X86::ADD32rr:
5025263509Sdim  case X86::ADD32rr_DB:
5026263509Sdim  case X86::ADD64i32:
5027263509Sdim  case X86::ADD64ri32:
5028263509Sdim  case X86::ADD64ri32_DB:
5029263509Sdim  case X86::ADD64ri8:
5030263509Sdim  case X86::ADD64ri8_DB:
5031263509Sdim  case X86::ADD64rm:
5032263509Sdim  case X86::ADD64rr:
5033263509Sdim  case X86::ADD64rr_DB:
5034263509Sdim  case X86::ADD8i8:
5035263509Sdim  case X86::ADD8mi:
5036263509Sdim  case X86::ADD8mr:
5037263509Sdim  case X86::ADD8ri:
5038263509Sdim  case X86::ADD8rm:
5039263509Sdim  case X86::ADD8rr:
5040263509Sdim  case X86::SUB16i16:
5041263509Sdim  case X86::SUB16ri:
5042263509Sdim  case X86::SUB16ri8:
5043263509Sdim  case X86::SUB16rm:
5044263509Sdim  case X86::SUB16rr:
5045263509Sdim  case X86::SUB32i32:
5046263509Sdim  case X86::SUB32ri:
5047263509Sdim  case X86::SUB32ri8:
5048263509Sdim  case X86::SUB32rm:
5049263509Sdim  case X86::SUB32rr:
5050263509Sdim  case X86::SUB64i32:
5051263509Sdim  case X86::SUB64ri32:
5052263509Sdim  case X86::SUB64ri8:
5053263509Sdim  case X86::SUB64rm:
5054263509Sdim  case X86::SUB64rr:
5055263509Sdim  case X86::SUB8i8:
5056263509Sdim  case X86::SUB8ri:
5057263509Sdim  case X86::SUB8rm:
5058263509Sdim  case X86::SUB8rr:
5059263509Sdim    return FuseKind == FuseCmp || FuseKind == FuseInc;
5060263509Sdim  case X86::INC16r:
5061263509Sdim  case X86::INC32r:
5062263509Sdim  case X86::INC64_16r:
5063263509Sdim  case X86::INC64_32r:
5064263509Sdim  case X86::INC64r:
5065263509Sdim  case X86::INC8r:
5066263509Sdim  case X86::DEC16r:
5067263509Sdim  case X86::DEC32r:
5068263509Sdim  case X86::DEC64_16r:
5069263509Sdim  case X86::DEC64_32r:
5070263509Sdim  case X86::DEC64r:
5071263509Sdim  case X86::DEC8r:
5072263509Sdim    return FuseKind == FuseInc;
5073263509Sdim  }
5074263509Sdim}
5075263509Sdim
5076193323Sedbool X86InstrInfo::
5077193323SedReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
5078193323Sed  assert(Cond.size() == 1 && "Invalid X86 branch condition!");
5079193323Sed  X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
5080193323Sed  if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E)
5081193323Sed    return true;
5082193323Sed  Cond[0].setImm(GetOppositeBranchCondition(CC));
5083193323Sed  return false;
5084193323Sed}
5085193323Sed
5086193323Sedbool X86InstrInfo::
5087193323SedisSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
5088193323Sed  // FIXME: Return false for x87 stack register classes for now. We can't
5089193323Sed  // allow any loads of these registers before FpGet_ST0_80.
5090193323Sed  return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass ||
5091193323Sed           RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
5092193323Sed}
5093193323Sed
5094193323Sed/// getGlobalBaseReg - Return a virtual register initialized with the
5095193323Sed/// the global base register value. Output instructions required to
5096193323Sed/// initialize the register in the function entry block, if necessary.
5097193323Sed///
5098210299Sed/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
5099210299Sed///
5100193323Sedunsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
5101193323Sed  assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
5102193323Sed         "X86-64 PIC uses RIP relative addressing");
5103193323Sed
5104193323Sed  X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
5105193323Sed  unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
5106193323Sed  if (GlobalBaseReg != 0)
5107193323Sed    return GlobalBaseReg;
5108193323Sed
5109210299Sed  // Create the register. The code to initialize it is inserted
5110210299Sed  // later, by the CGBR pass (below).
5111193323Sed  MachineRegisterInfo &RegInfo = MF->getRegInfo();
5112245431Sdim  GlobalBaseReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
5113193323Sed  X86FI->setGlobalBaseReg(GlobalBaseReg);
5114193323Sed  return GlobalBaseReg;
5115193323Sed}
5116206083Srdivacky
5117206083Srdivacky// These are the replaceable SSE instructions. Some of these have Int variants
5118206083Srdivacky// that we don't include here. We don't want to replace instructions selected
5119206083Srdivacky// by intrinsics.
5120235633Sdimstatic const uint16_t ReplaceableInstrs[][3] = {
5121212904Sdim  //PackedSingle     PackedDouble    PackedInt
5122206083Srdivacky  { X86::MOVAPSmr,   X86::MOVAPDmr,  X86::MOVDQAmr  },
5123206083Srdivacky  { X86::MOVAPSrm,   X86::MOVAPDrm,  X86::MOVDQArm  },
5124206083Srdivacky  { X86::MOVAPSrr,   X86::MOVAPDrr,  X86::MOVDQArr  },
5125206083Srdivacky  { X86::MOVUPSmr,   X86::MOVUPDmr,  X86::MOVDQUmr  },
5126206083Srdivacky  { X86::MOVUPSrm,   X86::MOVUPDrm,  X86::MOVDQUrm  },
5127206083Srdivacky  { X86::MOVNTPSmr,  X86::MOVNTPDmr, X86::MOVNTDQmr },
5128206083Srdivacky  { X86::ANDNPSrm,   X86::ANDNPDrm,  X86::PANDNrm   },
5129206083Srdivacky  { X86::ANDNPSrr,   X86::ANDNPDrr,  X86::PANDNrr   },
5130206083Srdivacky  { X86::ANDPSrm,    X86::ANDPDrm,   X86::PANDrm    },
5131206083Srdivacky  { X86::ANDPSrr,    X86::ANDPDrr,   X86::PANDrr    },
5132206083Srdivacky  { X86::ORPSrm,     X86::ORPDrm,    X86::PORrm     },
5133206083Srdivacky  { X86::ORPSrr,     X86::ORPDrr,    X86::PORrr     },
5134206083Srdivacky  { X86::XORPSrm,    X86::XORPDrm,   X86::PXORrm    },
5135206083Srdivacky  { X86::XORPSrr,    X86::XORPDrr,   X86::PXORrr    },
5136212904Sdim  // AVX 128-bit support
5137212904Sdim  { X86::VMOVAPSmr,  X86::VMOVAPDmr,  X86::VMOVDQAmr  },
5138212904Sdim  { X86::VMOVAPSrm,  X86::VMOVAPDrm,  X86::VMOVDQArm  },
5139212904Sdim  { X86::VMOVAPSrr,  X86::VMOVAPDrr,  X86::VMOVDQArr  },
5140212904Sdim  { X86::VMOVUPSmr,  X86::VMOVUPDmr,  X86::VMOVDQUmr  },
5141212904Sdim  { X86::VMOVUPSrm,  X86::VMOVUPDrm,  X86::VMOVDQUrm  },
5142212904Sdim  { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
5143212904Sdim  { X86::VANDNPSrm,  X86::VANDNPDrm,  X86::VPANDNrm   },
5144212904Sdim  { X86::VANDNPSrr,  X86::VANDNPDrr,  X86::VPANDNrr   },
5145212904Sdim  { X86::VANDPSrm,   X86::VANDPDrm,   X86::VPANDrm    },
5146212904Sdim  { X86::VANDPSrr,   X86::VANDPDrr,   X86::VPANDrr    },
5147212904Sdim  { X86::VORPSrm,    X86::VORPDrm,    X86::VPORrm     },
5148212904Sdim  { X86::VORPSrr,    X86::VORPDrr,    X86::VPORrr     },
5149212904Sdim  { X86::VXORPSrm,   X86::VXORPDrm,   X86::VPXORrm    },
5150212904Sdim  { X86::VXORPSrr,   X86::VXORPDrr,   X86::VPXORrr    },
5151224145Sdim  // AVX 256-bit support
5152224145Sdim  { X86::VMOVAPSYmr,   X86::VMOVAPDYmr,   X86::VMOVDQAYmr  },
5153224145Sdim  { X86::VMOVAPSYrm,   X86::VMOVAPDYrm,   X86::VMOVDQAYrm  },
5154224145Sdim  { X86::VMOVAPSYrr,   X86::VMOVAPDYrr,   X86::VMOVDQAYrr  },
5155224145Sdim  { X86::VMOVUPSYmr,   X86::VMOVUPDYmr,   X86::VMOVDQUYmr  },
5156224145Sdim  { X86::VMOVUPSYrm,   X86::VMOVUPDYrm,   X86::VMOVDQUYrm  },
5157235633Sdim  { X86::VMOVNTPSYmr,  X86::VMOVNTPDYmr,  X86::VMOVNTDQYmr }
5158206083Srdivacky};
5159206083Srdivacky
5160235633Sdimstatic const uint16_t ReplaceableInstrsAVX2[][3] = {
5161235633Sdim  //PackedSingle       PackedDouble       PackedInt
5162235633Sdim  { X86::VANDNPSYrm,   X86::VANDNPDYrm,   X86::VPANDNYrm   },
5163235633Sdim  { X86::VANDNPSYrr,   X86::VANDNPDYrr,   X86::VPANDNYrr   },
5164235633Sdim  { X86::VANDPSYrm,    X86::VANDPDYrm,    X86::VPANDYrm    },
5165235633Sdim  { X86::VANDPSYrr,    X86::VANDPDYrr,    X86::VPANDYrr    },
5166235633Sdim  { X86::VORPSYrm,     X86::VORPDYrm,     X86::VPORYrm     },
5167235633Sdim  { X86::VORPSYrr,     X86::VORPDYrr,     X86::VPORYrr     },
5168235633Sdim  { X86::VXORPSYrm,    X86::VXORPDYrm,    X86::VPXORYrm    },
5169235633Sdim  { X86::VXORPSYrr,    X86::VXORPDYrr,    X86::VPXORYrr    },
5170235633Sdim  { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
5171235633Sdim  { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
5172235633Sdim  { X86::VINSERTF128rm,  X86::VINSERTF128rm,  X86::VINSERTI128rm },
5173235633Sdim  { X86::VINSERTF128rr,  X86::VINSERTF128rr,  X86::VINSERTI128rr },
5174235633Sdim  { X86::VPERM2F128rm,   X86::VPERM2F128rm,   X86::VPERM2I128rm },
5175235633Sdim  { X86::VPERM2F128rr,   X86::VPERM2F128rr,   X86::VPERM2I128rr }
5176235633Sdim};
5177235633Sdim
5178206083Srdivacky// FIXME: Some shuffle and unpack instructions have equivalents in different
5179206083Srdivacky// domains, but they require a bit more work than just switching opcodes.
5180206083Srdivacky
5181235633Sdimstatic const uint16_t *lookup(unsigned opcode, unsigned domain) {
5182206083Srdivacky  for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
5183206083Srdivacky    if (ReplaceableInstrs[i][domain-1] == opcode)
5184206083Srdivacky      return ReplaceableInstrs[i];
5185206083Srdivacky  return 0;
5186206083Srdivacky}
5187206083Srdivacky
5188235633Sdimstatic const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
5189235633Sdim  for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
5190235633Sdim    if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
5191235633Sdim      return ReplaceableInstrsAVX2[i];
5192235633Sdim  return 0;
5193235633Sdim}
5194235633Sdim
5195206083Srdivackystd::pair<uint16_t, uint16_t>
5196226890SdimX86InstrInfo::getExecutionDomain(const MachineInstr *MI) const {
5197206083Srdivacky  uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
5198235633Sdim  bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2();
5199235633Sdim  uint16_t validDomains = 0;
5200235633Sdim  if (domain && lookup(MI->getOpcode(), domain))
5201235633Sdim    validDomains = 0xe;
5202235633Sdim  else if (domain && lookupAVX2(MI->getOpcode(), domain))
5203235633Sdim    validDomains = hasAVX2 ? 0xe : 0x6;
5204235633Sdim  return std::make_pair(domain, validDomains);
5205206083Srdivacky}
5206206083Srdivacky
5207226890Sdimvoid X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
5208206083Srdivacky  assert(Domain>0 && Domain<4 && "Invalid execution domain");
5209206083Srdivacky  uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
5210206083Srdivacky  assert(dom && "Not an SSE instruction");
5211235633Sdim  const uint16_t *table = lookup(MI->getOpcode(), dom);
5212235633Sdim  if (!table) { // try the other table
5213235633Sdim    assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) &&
5214235633Sdim           "256-bit vector operations only available in AVX2");
5215235633Sdim    table = lookupAVX2(MI->getOpcode(), dom);
5216235633Sdim  }
5217206083Srdivacky  assert(table && "Cannot change domain");
5218206083Srdivacky  MI->setDesc(get(table[Domain-1]));
5219206083Srdivacky}
5220207618Srdivacky
5221207618Srdivacky/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
5222207618Srdivackyvoid X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
5223207618Srdivacky  NopInst.setOpcode(X86::NOOP);
5224207618Srdivacky}
5225207618Srdivacky
5226221345Sdimbool X86InstrInfo::isHighLatencyDef(int opc) const {
5227221345Sdim  switch (opc) {
5228218893Sdim  default: return false;
5229218893Sdim  case X86::DIVSDrm:
5230218893Sdim  case X86::DIVSDrm_Int:
5231218893Sdim  case X86::DIVSDrr:
5232218893Sdim  case X86::DIVSDrr_Int:
5233218893Sdim  case X86::DIVSSrm:
5234218893Sdim  case X86::DIVSSrm_Int:
5235218893Sdim  case X86::DIVSSrr:
5236218893Sdim  case X86::DIVSSrr_Int:
5237218893Sdim  case X86::SQRTPDm:
5238218893Sdim  case X86::SQRTPDr:
5239218893Sdim  case X86::SQRTPSm:
5240218893Sdim  case X86::SQRTPSr:
5241218893Sdim  case X86::SQRTSDm:
5242218893Sdim  case X86::SQRTSDm_Int:
5243218893Sdim  case X86::SQRTSDr:
5244218893Sdim  case X86::SQRTSDr_Int:
5245218893Sdim  case X86::SQRTSSm:
5246218893Sdim  case X86::SQRTSSm_Int:
5247218893Sdim  case X86::SQRTSSr:
5248218893Sdim  case X86::SQRTSSr_Int:
5249226890Sdim  // AVX instructions with high latency
5250226890Sdim  case X86::VDIVSDrm:
5251226890Sdim  case X86::VDIVSDrm_Int:
5252226890Sdim  case X86::VDIVSDrr:
5253226890Sdim  case X86::VDIVSDrr_Int:
5254226890Sdim  case X86::VDIVSSrm:
5255226890Sdim  case X86::VDIVSSrm_Int:
5256226890Sdim  case X86::VDIVSSrr:
5257226890Sdim  case X86::VDIVSSrr_Int:
5258226890Sdim  case X86::VSQRTPDm:
5259226890Sdim  case X86::VSQRTPDr:
5260226890Sdim  case X86::VSQRTPSm:
5261226890Sdim  case X86::VSQRTPSr:
5262226890Sdim  case X86::VSQRTSDm:
5263226890Sdim  case X86::VSQRTSDm_Int:
5264226890Sdim  case X86::VSQRTSDr:
5265226890Sdim  case X86::VSQRTSSm:
5266226890Sdim  case X86::VSQRTSSm_Int:
5267226890Sdim  case X86::VSQRTSSr:
5268263509Sdim  case X86::VSQRTPDZrm:
5269263509Sdim  case X86::VSQRTPDZrr:
5270263509Sdim  case X86::VSQRTPSZrm:
5271263509Sdim  case X86::VSQRTPSZrr:
5272263509Sdim  case X86::VSQRTSDZm:
5273263509Sdim  case X86::VSQRTSDZm_Int:
5274263509Sdim  case X86::VSQRTSDZr:
5275263509Sdim  case X86::VSQRTSSZm_Int:
5276263509Sdim  case X86::VSQRTSSZr:
5277263509Sdim  case X86::VSQRTSSZm:
5278263509Sdim  case X86::VDIVSDZrm:
5279263509Sdim  case X86::VDIVSDZrr:
5280263509Sdim  case X86::VDIVSSZrm:
5281263509Sdim  case X86::VDIVSSZrr:
5282263509Sdim
5283263509Sdim  case X86::VGATHERQPSZrm:
5284263509Sdim  case X86::VGATHERQPDZrm:
5285263509Sdim  case X86::VGATHERDPDZrm:
5286263509Sdim  case X86::VGATHERDPSZrm:
5287263509Sdim  case X86::VPGATHERQDZrm:
5288263509Sdim  case X86::VPGATHERQQZrm:
5289263509Sdim  case X86::VPGATHERDDZrm:
5290263509Sdim  case X86::VPGATHERDQZrm:
5291263509Sdim  case X86::VSCATTERQPDZmr:
5292263509Sdim  case X86::VSCATTERQPSZmr:
5293263509Sdim  case X86::VSCATTERDPDZmr:
5294263509Sdim  case X86::VSCATTERDPSZmr:
5295263509Sdim  case X86::VPSCATTERQDZmr:
5296263509Sdim  case X86::VPSCATTERQQZmr:
5297263509Sdim  case X86::VPSCATTERDDZmr:
5298263509Sdim  case X86::VPSCATTERDQZmr:
5299218893Sdim    return true;
5300218893Sdim  }
5301218893Sdim}
5302218893Sdim
5303221345Sdimbool X86InstrInfo::
5304221345SdimhasHighOperandLatency(const InstrItineraryData *ItinData,
5305221345Sdim                      const MachineRegisterInfo *MRI,
5306221345Sdim                      const MachineInstr *DefMI, unsigned DefIdx,
5307221345Sdim                      const MachineInstr *UseMI, unsigned UseIdx) const {
5308221345Sdim  return isHighLatencyDef(DefMI->getOpcode());
5309221345Sdim}
5310221345Sdim
5311210299Sednamespace {
5312210299Sed  /// CGBR - Create Global Base Reg pass. This initializes the PIC
5313210299Sed  /// global base register for x86-32.
5314210299Sed  struct CGBR : public MachineFunctionPass {
5315210299Sed    static char ID;
5316212904Sdim    CGBR() : MachineFunctionPass(ID) {}
5317210299Sed
5318210299Sed    virtual bool runOnMachineFunction(MachineFunction &MF) {
5319210299Sed      const X86TargetMachine *TM =
5320210299Sed        static_cast<const X86TargetMachine *>(&MF.getTarget());
5321210299Sed
5322210299Sed      assert(!TM->getSubtarget<X86Subtarget>().is64Bit() &&
5323210299Sed             "X86-64 PIC uses RIP relative addressing");
5324210299Sed
5325210299Sed      // Only emit a global base reg in PIC mode.
5326210299Sed      if (TM->getRelocationModel() != Reloc::PIC_)
5327210299Sed        return false;
5328210299Sed
5329218893Sdim      X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
5330218893Sdim      unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
5331218893Sdim
5332218893Sdim      // If we didn't need a GlobalBaseReg, don't insert code.
5333218893Sdim      if (GlobalBaseReg == 0)
5334218893Sdim        return false;
5335218893Sdim
5336210299Sed      // Insert the set of GlobalBaseReg into the first MBB of the function
5337210299Sed      MachineBasicBlock &FirstMBB = MF.front();
5338210299Sed      MachineBasicBlock::iterator MBBI = FirstMBB.begin();
5339210299Sed      DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
5340210299Sed      MachineRegisterInfo &RegInfo = MF.getRegInfo();
5341210299Sed      const X86InstrInfo *TII = TM->getInstrInfo();
5342210299Sed
5343210299Sed      unsigned PC;
5344210299Sed      if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
5345245431Sdim        PC = RegInfo.createVirtualRegister(&X86::GR32RegClass);
5346210299Sed      else
5347218893Sdim        PC = GlobalBaseReg;
5348218893Sdim
5349210299Sed      // Operand of MovePCtoStack is completely ignored by asm printer. It's
5350210299Sed      // only used in JIT code emission as displacement to pc.
5351210299Sed      BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
5352218893Sdim
5353210299Sed      // If we're using vanilla 'GOT' PIC style, we should use relative addressing
5354210299Sed      // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
5355210299Sed      if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
5356210299Sed        // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
5357210299Sed        BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
5358210299Sed          .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
5359210299Sed                                        X86II::MO_GOT_ABSOLUTE_ADDRESS);
5360210299Sed      }
5361210299Sed
5362210299Sed      return true;
5363210299Sed    }
5364210299Sed
5365210299Sed    virtual const char *getPassName() const {
5366210299Sed      return "X86 PIC Global Base Reg Initialization";
5367210299Sed    }
5368210299Sed
5369210299Sed    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
5370210299Sed      AU.setPreservesCFG();
5371210299Sed      MachineFunctionPass::getAnalysisUsage(AU);
5372210299Sed    }
5373210299Sed  };
5374210299Sed}
5375210299Sed
5376210299Sedchar CGBR::ID = 0;
5377210299SedFunctionPass*
5378210299Sedllvm::createGlobalBaseRegPass() { return new CGBR(); }
5379245431Sdim
5380245431Sdimnamespace {
5381245431Sdim  struct LDTLSCleanup : public MachineFunctionPass {
5382245431Sdim    static char ID;
5383245431Sdim    LDTLSCleanup() : MachineFunctionPass(ID) {}
5384245431Sdim
5385245431Sdim    virtual bool runOnMachineFunction(MachineFunction &MF) {
5386245431Sdim      X86MachineFunctionInfo* MFI = MF.getInfo<X86MachineFunctionInfo>();
5387245431Sdim      if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
5388245431Sdim        // No point folding accesses if there isn't at least two.
5389245431Sdim        return false;
5390245431Sdim      }
5391245431Sdim
5392245431Sdim      MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
5393245431Sdim      return VisitNode(DT->getRootNode(), 0);
5394245431Sdim    }
5395245431Sdim
5396245431Sdim    // Visit the dominator subtree rooted at Node in pre-order.
5397245431Sdim    // If TLSBaseAddrReg is non-null, then use that to replace any
5398245431Sdim    // TLS_base_addr instructions. Otherwise, create the register
5399245431Sdim    // when the first such instruction is seen, and then use it
5400245431Sdim    // as we encounter more instructions.
5401245431Sdim    bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
5402245431Sdim      MachineBasicBlock *BB = Node->getBlock();
5403245431Sdim      bool Changed = false;
5404245431Sdim
5405245431Sdim      // Traverse the current block.
5406245431Sdim      for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
5407245431Sdim           ++I) {
5408245431Sdim        switch (I->getOpcode()) {
5409245431Sdim          case X86::TLS_base_addr32:
5410245431Sdim          case X86::TLS_base_addr64:
5411245431Sdim            if (TLSBaseAddrReg)
5412245431Sdim              I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
5413245431Sdim            else
5414245431Sdim              I = SetRegister(I, &TLSBaseAddrReg);
5415245431Sdim            Changed = true;
5416245431Sdim            break;
5417245431Sdim          default:
5418245431Sdim            break;
5419245431Sdim        }
5420245431Sdim      }
5421245431Sdim
5422245431Sdim      // Visit the children of this block in the dominator tree.
5423245431Sdim      for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
5424245431Sdim           I != E; ++I) {
5425245431Sdim        Changed |= VisitNode(*I, TLSBaseAddrReg);
5426245431Sdim      }
5427245431Sdim
5428245431Sdim      return Changed;
5429245431Sdim    }
5430245431Sdim
5431245431Sdim    // Replace the TLS_base_addr instruction I with a copy from
5432245431Sdim    // TLSBaseAddrReg, returning the new instruction.
5433245431Sdim    MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
5434245431Sdim                                         unsigned TLSBaseAddrReg) {
5435245431Sdim      MachineFunction *MF = I->getParent()->getParent();
5436245431Sdim      const X86TargetMachine *TM =
5437245431Sdim          static_cast<const X86TargetMachine *>(&MF->getTarget());
5438245431Sdim      const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
5439245431Sdim      const X86InstrInfo *TII = TM->getInstrInfo();
5440245431Sdim
5441245431Sdim      // Insert a Copy from TLSBaseAddrReg to RAX/EAX.
5442245431Sdim      MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
5443245431Sdim                                   TII->get(TargetOpcode::COPY),
5444245431Sdim                                   is64Bit ? X86::RAX : X86::EAX)
5445245431Sdim                                   .addReg(TLSBaseAddrReg);
5446245431Sdim
5447245431Sdim      // Erase the TLS_base_addr instruction.
5448245431Sdim      I->eraseFromParent();
5449245431Sdim
5450245431Sdim      return Copy;
5451245431Sdim    }
5452245431Sdim
5453245431Sdim    // Create a virtal register in *TLSBaseAddrReg, and populate it by
5454245431Sdim    // inserting a copy instruction after I. Returns the new instruction.
5455245431Sdim    MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
5456245431Sdim      MachineFunction *MF = I->getParent()->getParent();
5457245431Sdim      const X86TargetMachine *TM =
5458245431Sdim          static_cast<const X86TargetMachine *>(&MF->getTarget());
5459245431Sdim      const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
5460245431Sdim      const X86InstrInfo *TII = TM->getInstrInfo();
5461245431Sdim
5462245431Sdim      // Create a virtual register for the TLS base address.
5463245431Sdim      MachineRegisterInfo &RegInfo = MF->getRegInfo();
5464245431Sdim      *TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit
5465245431Sdim                                                      ? &X86::GR64RegClass
5466245431Sdim                                                      : &X86::GR32RegClass);
5467245431Sdim
5468245431Sdim      // Insert a copy from RAX/EAX to TLSBaseAddrReg.
5469245431Sdim      MachineInstr *Next = I->getNextNode();
5470245431Sdim      MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
5471245431Sdim                                   TII->get(TargetOpcode::COPY),
5472245431Sdim                                   *TLSBaseAddrReg)
5473245431Sdim                                   .addReg(is64Bit ? X86::RAX : X86::EAX);
5474245431Sdim
5475245431Sdim      return Copy;
5476245431Sdim    }
5477245431Sdim
5478245431Sdim    virtual const char *getPassName() const {
5479245431Sdim      return "Local Dynamic TLS Access Clean-up";
5480245431Sdim    }
5481245431Sdim
5482245431Sdim    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
5483245431Sdim      AU.setPreservesCFG();
5484245431Sdim      AU.addRequired<MachineDominatorTree>();
5485245431Sdim      MachineFunctionPass::getAnalysisUsage(AU);
5486245431Sdim    }
5487245431Sdim  };
5488245431Sdim}
5489245431Sdim
5490245431Sdimchar LDTLSCleanup::ID = 0;
5491245431SdimFunctionPass*
5492245431Sdimllvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
5493