1318663Sdim//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2318663Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6318663Sdim//
7318663Sdim//===----------------------------------------------------------------------===//
8318663Sdim//
9318663Sdim// Windows-specific.
10318663Sdim// A parser for the module-definition file (.def file).
11318663Sdim//
12318663Sdim// The format of module-definition files are described in this document:
13318663Sdim// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14318663Sdim//
15318663Sdim//===----------------------------------------------------------------------===//
16318663Sdim
17318663Sdim#include "llvm/Object/COFFModuleDefinition.h"
18318663Sdim#include "llvm/ADT/StringRef.h"
19318663Sdim#include "llvm/ADT/StringSwitch.h"
20318663Sdim#include "llvm/Object/COFF.h"
21318663Sdim#include "llvm/Object/COFFImportFile.h"
22318663Sdim#include "llvm/Object/Error.h"
23318663Sdim#include "llvm/Support/Error.h"
24321238Sdim#include "llvm/Support/Path.h"
25318663Sdim#include "llvm/Support/raw_ostream.h"
26318663Sdim
27318663Sdimusing namespace llvm::COFF;
28318663Sdimusing namespace llvm;
29318663Sdim
30318663Sdimnamespace llvm {
31318663Sdimnamespace object {
32318663Sdim
33318663Sdimenum Kind {
34318663Sdim  Unknown,
35318663Sdim  Eof,
36318663Sdim  Identifier,
37318663Sdim  Comma,
38318663Sdim  Equal,
39341825Sdim  EqualEqual,
40318663Sdim  KwBase,
41318663Sdim  KwConstant,
42318663Sdim  KwData,
43318663Sdim  KwExports,
44318663Sdim  KwHeapsize,
45318663Sdim  KwLibrary,
46318663Sdim  KwName,
47318663Sdim  KwNoname,
48318663Sdim  KwPrivate,
49318663Sdim  KwStacksize,
50318663Sdim  KwVersion,
51318663Sdim};
52318663Sdim
53318663Sdimstruct Token {
54318663Sdim  explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
55318663Sdim  Kind K;
56318663Sdim  StringRef Value;
57318663Sdim};
58318663Sdim
59321238Sdimstatic bool isDecorated(StringRef Sym, bool MingwDef) {
60327952Sdim  // In def files, the symbols can either be listed decorated or undecorated.
61327952Sdim  //
62327952Sdim  // - For cdecl symbols, only the undecorated form is allowed.
63327952Sdim  // - For fastcall and vectorcall symbols, both fully decorated or
64327952Sdim  //   undecorated forms can be present.
65327952Sdim  // - For stdcall symbols in non-MinGW environments, the decorated form is
66327952Sdim  //   fully decorated with leading underscore and trailing stack argument
67327952Sdim  //   size - like "_Func@0".
68327952Sdim  // - In MinGW def files, a decorated stdcall symbol does not include the
69327952Sdim  //   leading underscore though, like "Func@0".
70327952Sdim
71327952Sdim  // This function controls whether a leading underscore should be added to
72327952Sdim  // the given symbol name or not. For MinGW, treat a stdcall symbol name such
73327952Sdim  // as "Func@0" as undecorated, i.e. a leading underscore must be added.
74327952Sdim  // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
75327952Sdim  // as decorated, i.e. don't add any more leading underscores.
76327952Sdim  // We can't check for a leading underscore here, since function names
77327952Sdim  // themselves can start with an underscore, while a second one still needs
78327952Sdim  // to be added.
79327952Sdim  return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
80327952Sdim         (!MingwDef && Sym.contains('@'));
81318663Sdim}
82318663Sdim
83318663Sdimstatic Error createError(const Twine &Err) {
84318663Sdim  return make_error<StringError>(StringRef(Err.str()),
85318663Sdim                                 object_error::parse_failed);
86318663Sdim}
87318663Sdim
88318663Sdimclass Lexer {
89318663Sdimpublic:
90318663Sdim  Lexer(StringRef S) : Buf(S) {}
91318663Sdim
92318663Sdim  Token lex() {
93318663Sdim    Buf = Buf.trim();
94318663Sdim    if (Buf.empty())
95318663Sdim      return Token(Eof);
96318663Sdim
97318663Sdim    switch (Buf[0]) {
98318663Sdim    case '\0':
99318663Sdim      return Token(Eof);
100318663Sdim    case ';': {
101318663Sdim      size_t End = Buf.find('\n');
102318663Sdim      Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
103318663Sdim      return lex();
104318663Sdim    }
105318663Sdim    case '=':
106318663Sdim      Buf = Buf.drop_front();
107341825Sdim      if (Buf.startswith("=")) {
108321238Sdim        Buf = Buf.drop_front();
109341825Sdim        return Token(EqualEqual, "==");
110341825Sdim      }
111318663Sdim      return Token(Equal, "=");
112318663Sdim    case ',':
113318663Sdim      Buf = Buf.drop_front();
114318663Sdim      return Token(Comma, ",");
115318663Sdim    case '"': {
116318663Sdim      StringRef S;
117318663Sdim      std::tie(S, Buf) = Buf.substr(1).split('"');
118318663Sdim      return Token(Identifier, S);
119318663Sdim    }
120318663Sdim    default: {
121327952Sdim      size_t End = Buf.find_first_of("=,;\r\n \t\v");
122318663Sdim      StringRef Word = Buf.substr(0, End);
123318663Sdim      Kind K = llvm::StringSwitch<Kind>(Word)
124318663Sdim                   .Case("BASE", KwBase)
125318663Sdim                   .Case("CONSTANT", KwConstant)
126318663Sdim                   .Case("DATA", KwData)
127318663Sdim                   .Case("EXPORTS", KwExports)
128318663Sdim                   .Case("HEAPSIZE", KwHeapsize)
129318663Sdim                   .Case("LIBRARY", KwLibrary)
130318663Sdim                   .Case("NAME", KwName)
131318663Sdim                   .Case("NONAME", KwNoname)
132318663Sdim                   .Case("PRIVATE", KwPrivate)
133318663Sdim                   .Case("STACKSIZE", KwStacksize)
134318663Sdim                   .Case("VERSION", KwVersion)
135318663Sdim                   .Default(Identifier);
136318663Sdim      Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
137318663Sdim      return Token(K, Word);
138318663Sdim    }
139318663Sdim    }
140318663Sdim  }
141318663Sdim
142318663Sdimprivate:
143318663Sdim  StringRef Buf;
144318663Sdim};
145318663Sdim
146318663Sdimclass Parser {
147318663Sdimpublic:
148321238Sdim  explicit Parser(StringRef S, MachineTypes M, bool B)
149321238Sdim      : Lex(S), Machine(M), MingwDef(B) {}
150318663Sdim
151318663Sdim  Expected<COFFModuleDefinition> parse() {
152318663Sdim    do {
153318663Sdim      if (Error Err = parseOne())
154318663Sdim        return std::move(Err);
155318663Sdim    } while (Tok.K != Eof);
156318663Sdim    return Info;
157318663Sdim  }
158318663Sdim
159318663Sdimprivate:
160318663Sdim  void read() {
161318663Sdim    if (Stack.empty()) {
162318663Sdim      Tok = Lex.lex();
163318663Sdim      return;
164318663Sdim    }
165318663Sdim    Tok = Stack.back();
166318663Sdim    Stack.pop_back();
167318663Sdim  }
168318663Sdim
169318663Sdim  Error readAsInt(uint64_t *I) {
170318663Sdim    read();
171318663Sdim    if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
172318663Sdim      return createError("integer expected");
173318663Sdim    return Error::success();
174318663Sdim  }
175318663Sdim
176318663Sdim  Error expect(Kind Expected, StringRef Msg) {
177318663Sdim    read();
178318663Sdim    if (Tok.K != Expected)
179318663Sdim      return createError(Msg);
180318663Sdim    return Error::success();
181318663Sdim  }
182318663Sdim
183318663Sdim  void unget() { Stack.push_back(Tok); }
184318663Sdim
185318663Sdim  Error parseOne() {
186318663Sdim    read();
187318663Sdim    switch (Tok.K) {
188318663Sdim    case Eof:
189318663Sdim      return Error::success();
190318663Sdim    case KwExports:
191318663Sdim      for (;;) {
192318663Sdim        read();
193318663Sdim        if (Tok.K != Identifier) {
194318663Sdim          unget();
195318663Sdim          return Error::success();
196318663Sdim        }
197318663Sdim        if (Error Err = parseExport())
198318663Sdim          return Err;
199318663Sdim      }
200318663Sdim    case KwHeapsize:
201318663Sdim      return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
202318663Sdim    case KwStacksize:
203318663Sdim      return parseNumbers(&Info.StackReserve, &Info.StackCommit);
204318663Sdim    case KwLibrary:
205318663Sdim    case KwName: {
206318663Sdim      bool IsDll = Tok.K == KwLibrary; // Check before parseName.
207318663Sdim      std::string Name;
208318663Sdim      if (Error Err = parseName(&Name, &Info.ImageBase))
209318663Sdim        return Err;
210318663Sdim
211321238Sdim      Info.ImportName = Name;
212321238Sdim
213318663Sdim      // Set the output file, but don't override /out if it was already passed.
214321238Sdim      if (Info.OutputFile.empty()) {
215318663Sdim        Info.OutputFile = Name;
216321238Sdim        // Append the appropriate file extension if not already present.
217321238Sdim        if (!sys::path::has_extension(Name))
218321238Sdim          Info.OutputFile += IsDll ? ".dll" : ".exe";
219321238Sdim      }
220321238Sdim
221318663Sdim      return Error::success();
222318663Sdim    }
223318663Sdim    case KwVersion:
224318663Sdim      return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
225318663Sdim    default:
226318663Sdim      return createError("unknown directive: " + Tok.Value);
227318663Sdim    }
228318663Sdim  }
229318663Sdim
230318663Sdim  Error parseExport() {
231318663Sdim    COFFShortExport E;
232318663Sdim    E.Name = Tok.Value;
233318663Sdim    read();
234318663Sdim    if (Tok.K == Equal) {
235318663Sdim      read();
236318663Sdim      if (Tok.K != Identifier)
237318663Sdim        return createError("identifier expected, but got " + Tok.Value);
238318663Sdim      E.ExtName = E.Name;
239318663Sdim      E.Name = Tok.Value;
240318663Sdim    } else {
241318663Sdim      unget();
242318663Sdim    }
243318663Sdim
244318663Sdim    if (Machine == IMAGE_FILE_MACHINE_I386) {
245321238Sdim      if (!isDecorated(E.Name, MingwDef))
246318663Sdim        E.Name = (std::string("_").append(E.Name));
247321238Sdim      if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
248318663Sdim        E.ExtName = (std::string("_").append(E.ExtName));
249318663Sdim    }
250318663Sdim
251318663Sdim    for (;;) {
252318663Sdim      read();
253318663Sdim      if (Tok.K == Identifier && Tok.Value[0] == '@') {
254327952Sdim        if (Tok.Value == "@") {
255327952Sdim          // "foo @ 10"
256327952Sdim          read();
257327952Sdim          Tok.Value.getAsInteger(10, E.Ordinal);
258327952Sdim        } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
259327952Sdim          // "foo \n @bar" - Not an ordinal modifier at all, but the next
260327952Sdim          // export (fastcall decorated) - complete the current one.
261322855Sdim          unget();
262322855Sdim          Info.Exports.push_back(E);
263322855Sdim          return Error::success();
264322855Sdim        }
265327952Sdim        // "foo @10"
266318663Sdim        read();
267318663Sdim        if (Tok.K == KwNoname) {
268318663Sdim          E.Noname = true;
269318663Sdim        } else {
270318663Sdim          unget();
271318663Sdim        }
272318663Sdim        continue;
273318663Sdim      }
274318663Sdim      if (Tok.K == KwData) {
275318663Sdim        E.Data = true;
276318663Sdim        continue;
277318663Sdim      }
278318663Sdim      if (Tok.K == KwConstant) {
279318663Sdim        E.Constant = true;
280318663Sdim        continue;
281318663Sdim      }
282318663Sdim      if (Tok.K == KwPrivate) {
283318663Sdim        E.Private = true;
284318663Sdim        continue;
285318663Sdim      }
286341825Sdim      if (Tok.K == EqualEqual) {
287341825Sdim        read();
288341825Sdim        E.AliasTarget = Tok.Value;
289341825Sdim        if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
290341825Sdim          E.AliasTarget = std::string("_").append(E.AliasTarget);
291341825Sdim        continue;
292341825Sdim      }
293318663Sdim      unget();
294318663Sdim      Info.Exports.push_back(E);
295318663Sdim      return Error::success();
296318663Sdim    }
297318663Sdim  }
298318663Sdim
299318663Sdim  // HEAPSIZE/STACKSIZE reserve[,commit]
300318663Sdim  Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
301318663Sdim    if (Error Err = readAsInt(Reserve))
302318663Sdim      return Err;
303318663Sdim    read();
304318663Sdim    if (Tok.K != Comma) {
305318663Sdim      unget();
306318663Sdim      Commit = nullptr;
307318663Sdim      return Error::success();
308318663Sdim    }
309318663Sdim    if (Error Err = readAsInt(Commit))
310318663Sdim      return Err;
311318663Sdim    return Error::success();
312318663Sdim  }
313318663Sdim
314318663Sdim  // NAME outputPath [BASE=address]
315318663Sdim  Error parseName(std::string *Out, uint64_t *Baseaddr) {
316318663Sdim    read();
317318663Sdim    if (Tok.K == Identifier) {
318318663Sdim      *Out = Tok.Value;
319318663Sdim    } else {
320318663Sdim      *Out = "";
321318663Sdim      unget();
322318663Sdim      return Error::success();
323318663Sdim    }
324318663Sdim    read();
325318663Sdim    if (Tok.K == KwBase) {
326318663Sdim      if (Error Err = expect(Equal, "'=' expected"))
327318663Sdim        return Err;
328318663Sdim      if (Error Err = readAsInt(Baseaddr))
329318663Sdim        return Err;
330318663Sdim    } else {
331318663Sdim      unget();
332318663Sdim      *Baseaddr = 0;
333318663Sdim    }
334318663Sdim    return Error::success();
335318663Sdim  }
336318663Sdim
337318663Sdim  // VERSION major[.minor]
338318663Sdim  Error parseVersion(uint32_t *Major, uint32_t *Minor) {
339318663Sdim    read();
340318663Sdim    if (Tok.K != Identifier)
341318663Sdim      return createError("identifier expected, but got " + Tok.Value);
342318663Sdim    StringRef V1, V2;
343318663Sdim    std::tie(V1, V2) = Tok.Value.split('.');
344318663Sdim    if (V1.getAsInteger(10, *Major))
345318663Sdim      return createError("integer expected, but got " + Tok.Value);
346318663Sdim    if (V2.empty())
347318663Sdim      *Minor = 0;
348318663Sdim    else if (V2.getAsInteger(10, *Minor))
349318663Sdim      return createError("integer expected, but got " + Tok.Value);
350318663Sdim    return Error::success();
351318663Sdim  }
352318663Sdim
353318663Sdim  Lexer Lex;
354318663Sdim  Token Tok;
355318663Sdim  std::vector<Token> Stack;
356318663Sdim  MachineTypes Machine;
357318663Sdim  COFFModuleDefinition Info;
358321238Sdim  bool MingwDef;
359318663Sdim};
360318663Sdim
361318663SdimExpected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
362321238Sdim                                                         MachineTypes Machine,
363321238Sdim                                                         bool MingwDef) {
364321238Sdim  return Parser(MB.getBuffer(), Machine, MingwDef).parse();
365318663Sdim}
366318663Sdim
367318663Sdim} // namespace object
368318663Sdim} // namespace llvm
369