Mangler.cpp revision 243830
1178479Sjb//===-- Mangler.cpp - Self-contained c/asm llvm name mangler --------------===//
2178479Sjb//
3178479Sjb//                     The LLVM Compiler Infrastructure
4178479Sjb//
5178479Sjb// This file is distributed under the University of Illinois Open Source
6178479Sjb// License. See LICENSE.TXT for details.
7178479Sjb//
8178479Sjb//===----------------------------------------------------------------------===//
9178479Sjb//
10178479Sjb// Unified name mangler for assembly backends.
11178479Sjb//
12178479Sjb//===----------------------------------------------------------------------===//
13178479Sjb
14178479Sjb#include "llvm/Target/Mangler.h"
15178479Sjb#include "llvm/DerivedTypes.h"
16178479Sjb#include "llvm/Function.h"
17178479Sjb#include "llvm/DataLayout.h"
18178479Sjb#include "llvm/MC/MCAsmInfo.h"
19178479Sjb#include "llvm/MC/MCContext.h"
20178479Sjb#include "llvm/Support/raw_ostream.h"
21178479Sjb#include "llvm/ADT/SmallString.h"
22178479Sjb#include "llvm/ADT/Twine.h"
23178479Sjbusing namespace llvm;
24178479Sjb
25178479Sjbstatic bool isAcceptableChar(char C, bool AllowPeriod, bool AllowUTF8) {
26178479Sjb  if ((C < 'a' || C > 'z') &&
27237870Spfg      (C < 'A' || C > 'Z') &&
28237870Spfg      (C < '0' || C > '9') &&
29237870Spfg      C != '_' && C != '$' && C != '@' &&
30237870Spfg      !(AllowPeriod && C == '.') &&
31178479Sjb      !(AllowUTF8 && (C & 0x80)))
32178479Sjb    return false;
33178479Sjb  return true;
34178479Sjb}
35178479Sjb
36178479Sjbstatic char HexDigit(int V) {
37178479Sjb  return V < 10 ? V+'0' : V+'A'-10;
38184696Srodrigc}
39211554Srpaulo
40211554Srpaulostatic void MangleLetter(SmallVectorImpl<char> &OutName, unsigned char C) {
41211554Srpaulo  OutName.push_back('_');
42178479Sjb  OutName.push_back(HexDigit(C >> 4));
43178479Sjb  OutName.push_back(HexDigit(C & 15));
44178479Sjb  OutName.push_back('_');
45178479Sjb}
46178479Sjb
47178479Sjb/// NameNeedsEscaping - Return true if the identifier \p Str needs quotes
48178479Sjb/// for this assembler.
49178479Sjbstatic bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
50178479Sjb  assert(!Str.empty() && "Cannot create an empty MCSymbol");
51178479Sjb
52178479Sjb  // If the first character is a number and the target does not allow this, we
53178479Sjb  // need quotes.
54178479Sjb  if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9')
55178479Sjb    return true;
56178479Sjb
57178479Sjb  // If any of the characters in the string is an unacceptable character, force
58178479Sjb  // quotes.
59178479Sjb  bool AllowPeriod = MAI.doesAllowPeriodsInName();
60178479Sjb  bool AllowUTF8 = MAI.doesAllowUTF8();
61178479Sjb  for (unsigned i = 0, e = Str.size(); i != e; ++i)
62178479Sjb    if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
63178479Sjb      return true;
64178479Sjb  return false;
65178479Sjb}
66178479Sjb
67178479Sjb/// appendMangledName - Add the specified string in mangled form if it uses
68178479Sjb/// any unusual characters.
69178479Sjbstatic void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str,
70178479Sjb                              const MCAsmInfo &MAI) {
71178479Sjb  // The first character is not allowed to be a number unless the target
72178479Sjb  // explicitly allows it.
73178479Sjb  if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') {
74178479Sjb    MangleLetter(OutName, Str[0]);
75178479Sjb    Str = Str.substr(1);
76178479Sjb  }
77178479Sjb
78178479Sjb  bool AllowPeriod = MAI.doesAllowPeriodsInName();
79178479Sjb  bool AllowUTF8 = MAI.doesAllowUTF8();
80178479Sjb  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
81178479Sjb    if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
82178479Sjb      MangleLetter(OutName, Str[i]);
83178479Sjb    else
84178479Sjb      OutName.push_back(Str[i]);
85178479Sjb  }
86178479Sjb}
87178479Sjb
88178479Sjb
89178479Sjb/// appendMangledQuotedName - On systems that support quoted symbols, we still
90178479Sjb/// have to escape some (obscure) characters like " and \n which would break the
91178479Sjb/// assembler's lexing.
92178479Sjbstatic void appendMangledQuotedName(SmallVectorImpl<char> &OutName,
93178479Sjb                                   StringRef Str) {
94178479Sjb  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
95178479Sjb    if (Str[i] == '"' || Str[i] == '\n')
96178479Sjb      MangleLetter(OutName, Str[i]);
97178479Sjb    else
98178479Sjb      OutName.push_back(Str[i]);
99178479Sjb  }
100178479Sjb}
101178479Sjb
102178479Sjb
103178479Sjb/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
104178479Sjb/// and the specified name as the global variable name.  GVName must not be
105178479Sjb/// empty.
106178479Sjbvoid Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
107178479Sjb                                const Twine &GVName, ManglerPrefixTy PrefixTy) {
108178479Sjb  SmallString<256> TmpData;
109178479Sjb  StringRef Name = GVName.toStringRef(TmpData);
110178479Sjb  assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
111178479Sjb
112178479Sjb  const MCAsmInfo &MAI = Context.getAsmInfo();
113178479Sjb
114178479Sjb  // If the global name is not led with \1, add the appropriate prefixes.
115178479Sjb  if (Name[0] == '\1') {
116178479Sjb    Name = Name.substr(1);
117178479Sjb  } else {
118178479Sjb    if (PrefixTy == Mangler::Private) {
119178479Sjb      const char *Prefix = MAI.getPrivateGlobalPrefix();
120178479Sjb      OutName.append(Prefix, Prefix+strlen(Prefix));
121178479Sjb    } else if (PrefixTy == Mangler::LinkerPrivate) {
122178479Sjb      const char *Prefix = MAI.getLinkerPrivateGlobalPrefix();
123178479Sjb      OutName.append(Prefix, Prefix+strlen(Prefix));
124178479Sjb    }
125178479Sjb
126178479Sjb    const char *Prefix = MAI.getGlobalPrefix();
127178479Sjb    if (Prefix[0] == 0)
128178479Sjb      ; // Common noop, no prefix.
129178479Sjb    else if (Prefix[1] == 0)
130178479Sjb      OutName.push_back(Prefix[0]);  // Common, one character prefix.
131178479Sjb    else
132178479Sjb      OutName.append(Prefix, Prefix+strlen(Prefix)); // Arbitrary length prefix.
133178479Sjb  }
134178479Sjb
135178479Sjb  // If this is a simple string that doesn't need escaping, just append it.
136178479Sjb  if (!NameNeedsEscaping(Name, MAI) ||
137178479Sjb      // If quotes are supported, they can be used unless the string contains
138178479Sjb      // a quote or newline.
139178479Sjb      (MAI.doesAllowQuotesInName() &&
140178479Sjb       Name.find_first_of("\n\"") == StringRef::npos)) {
141178479Sjb    OutName.append(Name.begin(), Name.end());
142178479Sjb    return;
143178479Sjb  }
144178479Sjb
145178479Sjb  // On systems that do not allow quoted names, we need to mangle most
146178479Sjb  // strange characters.
147178479Sjb  if (!MAI.doesAllowQuotesInName())
148178479Sjb    return appendMangledName(OutName, Name, MAI);
149178479Sjb
150178479Sjb  // Okay, the system allows quoted strings.  We can quote most anything, the
151178479Sjb  // only characters that need escaping are " and \n.
152178479Sjb  assert(Name.find_first_of("\n\"") != StringRef::npos);
153178479Sjb  return appendMangledQuotedName(OutName, Name);
154178479Sjb}
155178479Sjb
156178479Sjb/// AddFastCallStdCallSuffix - Microsoft fastcall and stdcall functions require
157249856Spfg/// a suffix on their name indicating the number of words of arguments they
158178550Sjb/// take.
159178479Sjbstatic void AddFastCallStdCallSuffix(SmallVectorImpl<char> &OutName,
160178479Sjb                                     const Function *F, const DataLayout &TD) {
161178479Sjb  // Calculate arguments size total.
162178479Sjb  unsigned ArgWords = 0;
163178479Sjb  for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
164178479Sjb       AI != AE; ++AI) {
165178479Sjb    Type *Ty = AI->getType();
166178479Sjb    // 'Dereference' type in case of byval parameter attribute
167178479Sjb    if (AI->hasByValAttr())
168178479Sjb      Ty = cast<PointerType>(Ty)->getElementType();
169178479Sjb    // Size should be aligned to DWORD boundary
170178479Sjb    ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
171178479Sjb  }
172178479Sjb
173178479Sjb  raw_svector_ostream(OutName) << '@' << ArgWords;
174178479Sjb}
175178479Sjb
176178479Sjb
177178479Sjb/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
178178479Sjb/// and the specified global variable's name.  If the global variable doesn't
179178479Sjb/// have a name, this fills in a unique name for the global.
180178479Sjbvoid Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
181178479Sjb                                const GlobalValue *GV,
182178479Sjb                                bool isImplicitlyPrivate) {
183178479Sjb  ManglerPrefixTy PrefixTy = Mangler::Default;
184178479Sjb  if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
185178479Sjb    PrefixTy = Mangler::Private;
186178479Sjb  else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage())
187178479Sjb    PrefixTy = Mangler::LinkerPrivate;
188178479Sjb
189178479Sjb  // If this global has a name, handle it simply.
190178479Sjb  if (GV->hasName()) {
191178479Sjb    getNameWithPrefix(OutName, GV->getName(), PrefixTy);
192178479Sjb  } else {
193178479Sjb    // Get the ID for the global, assigning a new one if we haven't got one
194178479Sjb    // already.
195178479Sjb    unsigned &ID = AnonGlobalIDs[GV];
196178479Sjb    if (ID == 0) ID = NextAnonGlobalID++;
197178479Sjb
198178479Sjb    // Must mangle the global into a unique ID.
199178479Sjb    getNameWithPrefix(OutName, "__unnamed_" + Twine(ID), PrefixTy);
200178479Sjb  }
201178479Sjb
202178479Sjb  // If we are supposed to add a microsoft-style suffix for stdcall/fastcall,
203178479Sjb  // add it.
204178479Sjb  if (Context.getAsmInfo().hasMicrosoftFastStdCallMangling()) {
205178479Sjb    if (const Function *F = dyn_cast<Function>(GV)) {
206178479Sjb      CallingConv::ID CC = F->getCallingConv();
207178479Sjb
208178479Sjb      // fastcall functions need to start with @.
209178479Sjb      // FIXME: This logic seems unlikely to be right.
210178479Sjb      if (CC == CallingConv::X86_FastCall) {
211178479Sjb        if (OutName[0] == '_')
212178479Sjb          OutName[0] = '@';
213178479Sjb        else
214178479Sjb          OutName.insert(OutName.begin(), '@');
215178479Sjb      }
216178479Sjb
217178479Sjb      // fastcall and stdcall functions usually need @42 at the end to specify
218178479Sjb      // the argument info.
219178479Sjb      FunctionType *FT = F->getFunctionType();
220178479Sjb      if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) &&
221178479Sjb          // "Pure" variadic functions do not receive @0 suffix.
222178479Sjb          (!FT->isVarArg() || FT->getNumParams() == 0 ||
223178479Sjb           (FT->getNumParams() == 1 && F->hasStructRetAttr())))
224178479Sjb        AddFastCallStdCallSuffix(OutName, F, TD);
225178479Sjb    }
226178479Sjb  }
227178479Sjb}
228178479Sjb
229178479Sjb/// getSymbol - Return the MCSymbol for the specified global value.  This
230178479Sjb/// symbol is the main label that is the address of the global.
231178479SjbMCSymbol *Mangler::getSymbol(const GlobalValue *GV) {
232178479Sjb  SmallString<60> NameStr;
233178479Sjb  getNameWithPrefix(NameStr, GV, false);
234178479Sjb  return Context.GetOrCreateSymbol(NameStr.str());
235178479Sjb}
236178479Sjb
237178479Sjb
238178479Sjb