X86Subtarget.cpp revision 212904
1198157Srrs//===-- X86Subtarget.cpp - X86 Subtarget Information ------------*- C++ -*-===//
2198157Srrs//
3198157Srrs//                     The LLVM Compiler Infrastructure
4198157Srrs//
5198157Srrs// This file is distributed under the University of Illinois Open Source
6198157Srrs// License. See LICENSE.TXT for details.
7198157Srrs//
8198157Srrs//===----------------------------------------------------------------------===//
9198157Srrs//
10198157Srrs// This file implements the X86 specific subclass of TargetSubtarget.
11198157Srrs//
12198157Srrs//===----------------------------------------------------------------------===//
13198157Srrs
14198157Srrs#define DEBUG_TYPE "subtarget"
15198157Srrs#include "X86Subtarget.h"
16198157Srrs#include "X86InstrInfo.h"
17198157Srrs#include "X86GenSubtarget.inc"
18198157Srrs#include "llvm/GlobalValue.h"
19198157Srrs#include "llvm/Support/Debug.h"
20198157Srrs#include "llvm/Support/raw_ostream.h"
21198157Srrs#include "llvm/System/Host.h"
22198157Srrs#include "llvm/Target/TargetMachine.h"
23198157Srrs#include "llvm/Target/TargetOptions.h"
24198157Srrs#include "llvm/ADT/SmallVector.h"
25198157Srrsusing namespace llvm;
26198157Srrs
27198157Srrs#if defined(_MSC_VER)
28198157Srrs#include <intrin.h>
29198157Srrs#endif
30198157Srrs
31198157Srrs/// ClassifyBlockAddressReference - Classify a blockaddress reference for the
32198157Srrs/// current subtarget according to how we should reference it in a non-pcrel
33198157Srrs/// context.
34198157Srrsunsigned char X86Subtarget::
35198157SrrsClassifyBlockAddressReference() const {
36198157Srrs  if (isPICStyleGOT())    // 32-bit ELF targets.
37198157Srrs    return X86II::MO_GOTOFF;
38198157Srrs
39198157Srrs  if (isPICStyleStubPIC())   // Darwin/32 in PIC mode.
40198157Srrs    return X86II::MO_PIC_BASE_OFFSET;
41198157Srrs
42198157Srrs  // Direct static reference to label.
43198157Srrs  return X86II::MO_NO_FLAG;
44198157Srrs}
45198157Srrs
46198157Srrs/// ClassifyGlobalReference - Classify a global variable reference for the
47198157Srrs/// current subtarget according to how we should reference it in a non-pcrel
48198157Srrs/// context.
49198157Srrsunsigned char X86Subtarget::
50198157SrrsClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
51198157Srrs  // DLLImport only exists on windows, it is implemented as a load from a
52198157Srrs  // DLLIMPORT stub.
53198157Srrs  if (GV->hasDLLImportLinkage())
54198157Srrs    return X86II::MO_DLLIMPORT;
55198157Srrs
56198157Srrs  // Determine whether this is a reference to a definition or a declaration.
57198157Srrs  // Materializable GVs (in JIT lazy compilation mode) do not require an extra
58198157Srrs  // load from stub.
59198157Srrs  bool isDecl = GV->hasAvailableExternallyLinkage();
60198157Srrs  if (GV->isDeclaration() && !GV->isMaterializable())
61198157Srrs    isDecl = true;
62198157Srrs
63198157Srrs  // X86-64 in PIC mode.
64198157Srrs  if (isPICStyleRIPRel()) {
65198157Srrs    // Large model never uses stubs.
66198157Srrs    if (TM.getCodeModel() == CodeModel::Large)
67198157Srrs      return X86II::MO_NO_FLAG;
68198157Srrs
69198157Srrs    if (isTargetDarwin()) {
70198157Srrs      // If symbol visibility is hidden, the extra load is not needed if
71198157Srrs      // target is x86-64 or the symbol is definitely defined in the current
72198157Srrs      // translation unit.
73198157Srrs      if (GV->hasDefaultVisibility() &&
74198157Srrs          (isDecl || GV->isWeakForLinker()))
75198157Srrs        return X86II::MO_GOTPCREL;
76198157Srrs    } else if (!isTargetWin64()) {
77198157Srrs      assert(isTargetELF() && "Unknown rip-relative target");
78198157Srrs
79198157Srrs      // Extra load is needed for all externally visible.
80198157Srrs      if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility())
81198157Srrs        return X86II::MO_GOTPCREL;
82198157Srrs    }
83198157Srrs
84198157Srrs    return X86II::MO_NO_FLAG;
85198157Srrs  }
86198157Srrs
87198157Srrs  if (isPICStyleGOT()) {   // 32-bit ELF targets.
88198157Srrs    // Extra load is needed for all externally visible.
89198157Srrs    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
90198157Srrs      return X86II::MO_GOTOFF;
91198157Srrs    return X86II::MO_GOT;
92198157Srrs  }
93198157Srrs
94198157Srrs  if (isPICStyleStubPIC()) {  // Darwin/32 in PIC mode.
95198157Srrs    // Determine whether we have a stub reference and/or whether the reference
96198157Srrs    // is relative to the PIC base or not.
97198157Srrs
98198157Srrs    // If this is a strong reference to a definition, it is definitely not
99198157Srrs    // through a stub.
100198157Srrs    if (!isDecl && !GV->isWeakForLinker())
101198157Srrs      return X86II::MO_PIC_BASE_OFFSET;
102198157Srrs
103198157Srrs    // Unless we have a symbol with hidden visibility, we have to go through a
104198157Srrs    // normal $non_lazy_ptr stub because this symbol might be resolved late.
105198157Srrs    if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
106198157Srrs      return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
107198157Srrs
108198157Srrs    // If symbol visibility is hidden, we have a stub for common symbol
109198157Srrs    // references and external declarations.
110198157Srrs    if (isDecl || GV->hasCommonLinkage()) {
111198157Srrs      // Hidden $non_lazy_ptr reference.
112198157Srrs      return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
113198157Srrs    }
114198157Srrs
115198157Srrs    // Otherwise, no stub.
116198157Srrs    return X86II::MO_PIC_BASE_OFFSET;
117198157Srrs  }
118198157Srrs
119198157Srrs  if (isPICStyleStubNoDynamic()) {  // Darwin/32 in -mdynamic-no-pic mode.
120198157Srrs    // Determine whether we have a stub reference.
121198157Srrs
122198157Srrs    // If this is a strong reference to a definition, it is definitely not
123198157Srrs    // through a stub.
124198157Srrs    if (!isDecl && !GV->isWeakForLinker())
125198157Srrs      return X86II::MO_NO_FLAG;
126198157Srrs
127198157Srrs    // Unless we have a symbol with hidden visibility, we have to go through a
128198157Srrs    // normal $non_lazy_ptr stub because this symbol might be resolved late.
129198157Srrs    if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
130198157Srrs      return X86II::MO_DARWIN_NONLAZY;
131198157Srrs
132198157Srrs    // Otherwise, no stub.
133198157Srrs    return X86II::MO_NO_FLAG;
134198157Srrs  }
135198157Srrs
136198157Srrs  // Direct static reference to global.
137198157Srrs  return X86II::MO_NO_FLAG;
138198157Srrs}
139198157Srrs
140198157Srrs
141198157Srrs/// getBZeroEntry - This function returns the name of a function which has an
142198157Srrs/// interface like the non-standard bzero function, if such a function exists on
143198157Srrs/// the current subtarget and it is considered prefereable over memset with zero
144198157Srrs/// passed as the second argument. Otherwise it returns null.
145198157Srrsconst char *X86Subtarget::getBZeroEntry() const {
146198157Srrs  // Darwin 10 has a __bzero entry point for this purpose.
147198157Srrs  if (getDarwinVers() >= 10)
148198157Srrs    return "__bzero";
149198157Srrs
150198157Srrs  return 0;
151198157Srrs}
152198157Srrs
153198157Srrs/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
154198157Srrs/// to immediate address.
155198157Srrsbool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
156198157Srrs  if (Is64Bit)
157198157Srrs    return false;
158198157Srrs  return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
159198157Srrs}
160198157Srrs
161198157Srrs/// getSpecialAddressLatency - For targets where it is beneficial to
162198157Srrs/// backschedule instructions that compute addresses, return a value
163198157Srrs/// indicating the number of scheduling cycles of backscheduling that
164198157Srrs/// should be attempted.
165198157Srrsunsigned X86Subtarget::getSpecialAddressLatency() const {
166198157Srrs  // For x86 out-of-order targets, back-schedule address computations so
167198157Srrs  // that loads and stores aren't blocked.
168198157Srrs  // This value was chosen arbitrarily.
169198157Srrs  return 200;
170198157Srrs}
171198157Srrs
172198157Srrs/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
173198157Srrs/// specified arguments.  If we can't run cpuid on the host, return true.
174198157Srrsstatic bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
175198157Srrs                            unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
176198157Srrs#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
177198157Srrs  #if defined(__GNUC__)
178198157Srrs    // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
179198157Srrs    asm ("movq\t%%rbx, %%rsi\n\t"
180198157Srrs         "cpuid\n\t"
181198157Srrs         "xchgq\t%%rbx, %%rsi\n\t"
182198157Srrs         : "=a" (*rEAX),
183198157Srrs           "=S" (*rEBX),
184198157Srrs           "=c" (*rECX),
185198157Srrs           "=d" (*rEDX)
186198157Srrs         :  "a" (value));
187198157Srrs    return false;
188198157Srrs  #elif defined(_MSC_VER)
189198157Srrs    int registers[4];
190198157Srrs    __cpuid(registers, value);
191198157Srrs    *rEAX = registers[0];
192198157Srrs    *rEBX = registers[1];
193198157Srrs    *rECX = registers[2];
194198157Srrs    *rEDX = registers[3];
195198157Srrs    return false;
196198157Srrs  #endif
197198157Srrs#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
198198157Srrs  #if defined(__GNUC__)
199198157Srrs    asm ("movl\t%%ebx, %%esi\n\t"
200198157Srrs         "cpuid\n\t"
201198157Srrs         "xchgl\t%%ebx, %%esi\n\t"
202198157Srrs         : "=a" (*rEAX),
203198157Srrs           "=S" (*rEBX),
204198157Srrs           "=c" (*rECX),
205198157Srrs           "=d" (*rEDX)
206198157Srrs         :  "a" (value));
207198157Srrs    return false;
208198157Srrs  #elif defined(_MSC_VER)
209198157Srrs    __asm {
210198157Srrs      mov   eax,value
211198157Srrs      cpuid
212198157Srrs      mov   esi,rEAX
213198157Srrs      mov   dword ptr [esi],eax
214198157Srrs      mov   esi,rEBX
215198157Srrs      mov   dword ptr [esi],ebx
216198157Srrs      mov   esi,rECX
217198157Srrs      mov   dword ptr [esi],ecx
218198157Srrs      mov   esi,rEDX
219198157Srrs      mov   dword ptr [esi],edx
220198157Srrs    }
221198157Srrs    return false;
222198157Srrs  #endif
223198157Srrs#endif
224198157Srrs  return true;
225198157Srrs}
226198157Srrs
227198157Srrsstatic void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
228198157Srrs  Family = (EAX >> 8) & 0xf; // Bits 8 - 11
229198157Srrs  Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
230198157Srrs  if (Family == 6 || Family == 0xf) {
231198157Srrs    if (Family == 0xf)
232198157Srrs      // Examine extended family ID if family ID is F.
233198157Srrs      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
234198157Srrs    // Examine extended model ID if family ID is 6 or F.
235198157Srrs    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
236198157Srrs  }
237198157Srrs}
238198157Srrs
239198157Srrsvoid X86Subtarget::AutoDetectSubtargetFeatures() {
240198157Srrs  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
241198157Srrs  union {
242198157Srrs    unsigned u[3];
243198157Srrs    char     c[12];
244198157Srrs  } text;
245198157Srrs
246198157Srrs  if (GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
247198157Srrs    return;
248198157Srrs
249198157Srrs  GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
250198157Srrs
251198157Srrs  if ((EDX >> 15) & 1) HasCMov = true;
252198157Srrs  if ((EDX >> 23) & 1) X86SSELevel = MMX;
253198157Srrs  if ((EDX >> 25) & 1) X86SSELevel = SSE1;
254198157Srrs  if ((EDX >> 26) & 1) X86SSELevel = SSE2;
255198157Srrs  if (ECX & 0x1)       X86SSELevel = SSE3;
256198157Srrs  if ((ECX >> 9)  & 1) X86SSELevel = SSSE3;
257198157Srrs  if ((ECX >> 19) & 1) X86SSELevel = SSE41;
258198157Srrs  if ((ECX >> 20) & 1) X86SSELevel = SSE42;
259198157Srrs
260198157Srrs  bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
261198157Srrs  bool IsAMD   = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
262198157Srrs
263198157Srrs  HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
264198157Srrs  HasFMA3  = IsIntel && ((ECX >> 12) & 0x1);
265198157Srrs  HasAVX   = ((ECX >> 28) & 0x1);
266198157Srrs  HasAES   = IsIntel && ((ECX >> 25) & 0x1);
267198157Srrs
268198157Srrs  if (IsIntel || IsAMD) {
269198157Srrs    // Determine if bit test memory instructions are slow.
270198157Srrs    unsigned Family = 0;
271198157Srrs    unsigned Model  = 0;
272198157Srrs    DetectFamilyModel(EAX, Family, Model);
273198157Srrs    IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
274198157Srrs    // If it's Nehalem, unaligned memory access is fast.
275198157Srrs    if (Family == 15 && Model == 26)
276198157Srrs      IsUAMemFast = true;
277198157Srrs
278198157Srrs    GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
279198157Srrs    HasX86_64 = (EDX >> 29) & 0x1;
280198157Srrs    HasSSE4A = IsAMD && ((ECX >> 6) & 0x1);
281198157Srrs    HasFMA4 = IsAMD && ((ECX >> 16) & 0x1);
282198157Srrs  }
283198157Srrs}
284198157Srrs
285198157SrrsX86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
286198157Srrs                           bool is64Bit)
287198157Srrs  : PICStyle(PICStyles::None)
288198157Srrs  , X86SSELevel(NoMMXSSE)
289198157Srrs  , X863DNowLevel(NoThreeDNow)
290198157Srrs  , HasCMov(false)
291198157Srrs  , HasX86_64(false)
292198157Srrs  , HasSSE4A(false)
293198157Srrs  , HasAVX(false)
294198157Srrs  , HasAES(false)
295198157Srrs  , HasCLMUL(false)
296198157Srrs  , HasFMA3(false)
297198157Srrs  , HasFMA4(false)
298198157Srrs  , IsBTMemSlow(false)
299198157Srrs  , IsUAMemFast(false)
300198157Srrs  , HasVectorUAMem(false)
301198157Srrs  , stackAlignment(8)
302198157Srrs  // FIXME: this is a known good value for Yonah. How about others?
303198157Srrs  , MaxInlineSizeThreshold(128)
304198157Srrs  , TargetTriple(TT)
305198157Srrs  , Is64Bit(is64Bit) {
306198157Srrs
307198157Srrs  // default to hard float ABI
308198157Srrs  if (FloatABIType == FloatABI::Default)
309198157Srrs    FloatABIType = FloatABI::Hard;
310198157Srrs
311198157Srrs  // Determine default and user specified characteristics
312198157Srrs  if (!FS.empty()) {
313198157Srrs    // If feature string is not empty, parse features string.
314198157Srrs    std::string CPU = sys::getHostCPUName();
315198157Srrs    ParseSubtargetFeatures(FS, CPU);
316198157Srrs    // All X86-64 CPUs also have SSE2, however user might request no SSE via
317198157Srrs    // -mattr, so don't force SSELevel here.
318198157Srrs  } else {
319198157Srrs    // Otherwise, use CPUID to auto-detect feature set.
320198157Srrs    AutoDetectSubtargetFeatures();
321198157Srrs    // Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
322198157Srrs    if (Is64Bit && X86SSELevel < SSE2)
323198157Srrs      X86SSELevel = SSE2;
324198157Srrs  }
325198157Srrs
326198157Srrs  // If requesting codegen for X86-64, make sure that 64-bit features
327198157Srrs  // are enabled.
328198157Srrs  if (Is64Bit) {
329198157Srrs    HasX86_64 = true;
330198157Srrs
331198157Srrs    // All 64-bit cpus have cmov support.
332198157Srrs    HasCMov = true;
333198157Srrs  }
334198157Srrs
335198157Srrs  DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
336198157Srrs               << ", 3DNowLevel " << X863DNowLevel
337198157Srrs               << ", 64bit " << HasX86_64 << "\n");
338198157Srrs  assert((!Is64Bit || HasX86_64) &&
339198157Srrs         "64-bit code requested on a subtarget that doesn't support it!");
340198157Srrs
341198157Srrs  // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64
342198157Srrs  // bit targets.
343198157Srrs  if (isTargetDarwin() || Is64Bit)
344198157Srrs    stackAlignment = 16;
345198157Srrs
346198157Srrs  if (StackAlignment)
347198157Srrs    stackAlignment = StackAlignment;
348198157Srrs}
349198157Srrs
350198157Srrs/// IsCalleePop - Determines whether the callee is required to pop its
351198157Srrs/// own arguments. Callee pop is necessary to support tail calls.
352198157Srrsbool X86Subtarget::IsCalleePop(bool IsVarArg,
353198157Srrs                               CallingConv::ID CallingConv) const {
354198157Srrs  if (IsVarArg)
355198157Srrs    return false;
356198157Srrs
357198157Srrs  switch (CallingConv) {
358198157Srrs  default:
359198157Srrs    return false;
360198157Srrs  case CallingConv::X86_StdCall:
361198157Srrs    return !is64Bit();
362198157Srrs  case CallingConv::X86_FastCall:
363198157Srrs    return !is64Bit();
364198157Srrs  case CallingConv::X86_ThisCall:
365198157Srrs    return !is64Bit();
366198157Srrs  case CallingConv::Fast:
367198157Srrs    return GuaranteedTailCallOpt;
368198157Srrs  case CallingConv::GHC:
369198157Srrs    return GuaranteedTailCallOpt;
370198157Srrs  }
371198157Srrs}
372198157Srrs