1//===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Windows-specific.
10// A parser for the module-definition file (.def file).
11//
12// The format of module-definition files are described in this document:
13// https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/Object/COFFModuleDefinition.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/StringSwitch.h"
20#include "llvm/Object/COFF.h"
21#include "llvm/Object/COFFImportFile.h"
22#include "llvm/Object/Error.h"
23#include "llvm/Support/Error.h"
24#include "llvm/Support/Path.h"
25#include "llvm/Support/raw_ostream.h"
26
27using namespace llvm::COFF;
28using namespace llvm;
29
30namespace llvm {
31namespace object {
32
33enum Kind {
34  Unknown,
35  Eof,
36  Identifier,
37  Comma,
38  Equal,
39  EqualEqual,
40  KwBase,
41  KwConstant,
42  KwData,
43  KwExports,
44  KwHeapsize,
45  KwLibrary,
46  KwName,
47  KwNoname,
48  KwPrivate,
49  KwStacksize,
50  KwVersion,
51};
52
53struct Token {
54  explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
55  Kind K;
56  StringRef Value;
57};
58
59static bool isDecorated(StringRef Sym, bool MingwDef) {
60  // In def files, the symbols can either be listed decorated or undecorated.
61  //
62  // - For cdecl symbols, only the undecorated form is allowed.
63  // - For fastcall and vectorcall symbols, both fully decorated or
64  //   undecorated forms can be present.
65  // - For stdcall symbols in non-MinGW environments, the decorated form is
66  //   fully decorated with leading underscore and trailing stack argument
67  //   size - like "_Func@0".
68  // - In MinGW def files, a decorated stdcall symbol does not include the
69  //   leading underscore though, like "Func@0".
70
71  // This function controls whether a leading underscore should be added to
72  // the given symbol name or not. For MinGW, treat a stdcall symbol name such
73  // as "Func@0" as undecorated, i.e. a leading underscore must be added.
74  // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
75  // as decorated, i.e. don't add any more leading underscores.
76  // We can't check for a leading underscore here, since function names
77  // themselves can start with an underscore, while a second one still needs
78  // to be added.
79  return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
80         (!MingwDef && Sym.contains('@'));
81}
82
83static Error createError(const Twine &Err) {
84  return make_error<StringError>(StringRef(Err.str()),
85                                 object_error::parse_failed);
86}
87
88class Lexer {
89public:
90  Lexer(StringRef S) : Buf(S) {}
91
92  Token lex() {
93    Buf = Buf.trim();
94    if (Buf.empty())
95      return Token(Eof);
96
97    switch (Buf[0]) {
98    case '\0':
99      return Token(Eof);
100    case ';': {
101      size_t End = Buf.find('\n');
102      Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
103      return lex();
104    }
105    case '=':
106      Buf = Buf.drop_front();
107      if (Buf.startswith("=")) {
108        Buf = Buf.drop_front();
109        return Token(EqualEqual, "==");
110      }
111      return Token(Equal, "=");
112    case ',':
113      Buf = Buf.drop_front();
114      return Token(Comma, ",");
115    case '"': {
116      StringRef S;
117      std::tie(S, Buf) = Buf.substr(1).split('"');
118      return Token(Identifier, S);
119    }
120    default: {
121      size_t End = Buf.find_first_of("=,;\r\n \t\v");
122      StringRef Word = Buf.substr(0, End);
123      Kind K = llvm::StringSwitch<Kind>(Word)
124                   .Case("BASE", KwBase)
125                   .Case("CONSTANT", KwConstant)
126                   .Case("DATA", KwData)
127                   .Case("EXPORTS", KwExports)
128                   .Case("HEAPSIZE", KwHeapsize)
129                   .Case("LIBRARY", KwLibrary)
130                   .Case("NAME", KwName)
131                   .Case("NONAME", KwNoname)
132                   .Case("PRIVATE", KwPrivate)
133                   .Case("STACKSIZE", KwStacksize)
134                   .Case("VERSION", KwVersion)
135                   .Default(Identifier);
136      Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
137      return Token(K, Word);
138    }
139    }
140  }
141
142private:
143  StringRef Buf;
144};
145
146class Parser {
147public:
148  explicit Parser(StringRef S, MachineTypes M, bool B)
149      : Lex(S), Machine(M), MingwDef(B) {}
150
151  Expected<COFFModuleDefinition> parse() {
152    do {
153      if (Error Err = parseOne())
154        return std::move(Err);
155    } while (Tok.K != Eof);
156    return Info;
157  }
158
159private:
160  void read() {
161    if (Stack.empty()) {
162      Tok = Lex.lex();
163      return;
164    }
165    Tok = Stack.back();
166    Stack.pop_back();
167  }
168
169  Error readAsInt(uint64_t *I) {
170    read();
171    if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
172      return createError("integer expected");
173    return Error::success();
174  }
175
176  Error expect(Kind Expected, StringRef Msg) {
177    read();
178    if (Tok.K != Expected)
179      return createError(Msg);
180    return Error::success();
181  }
182
183  void unget() { Stack.push_back(Tok); }
184
185  Error parseOne() {
186    read();
187    switch (Tok.K) {
188    case Eof:
189      return Error::success();
190    case KwExports:
191      for (;;) {
192        read();
193        if (Tok.K != Identifier) {
194          unget();
195          return Error::success();
196        }
197        if (Error Err = parseExport())
198          return Err;
199      }
200    case KwHeapsize:
201      return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
202    case KwStacksize:
203      return parseNumbers(&Info.StackReserve, &Info.StackCommit);
204    case KwLibrary:
205    case KwName: {
206      bool IsDll = Tok.K == KwLibrary; // Check before parseName.
207      std::string Name;
208      if (Error Err = parseName(&Name, &Info.ImageBase))
209        return Err;
210
211      Info.ImportName = Name;
212
213      // Set the output file, but don't override /out if it was already passed.
214      if (Info.OutputFile.empty()) {
215        Info.OutputFile = Name;
216        // Append the appropriate file extension if not already present.
217        if (!sys::path::has_extension(Name))
218          Info.OutputFile += IsDll ? ".dll" : ".exe";
219      }
220
221      return Error::success();
222    }
223    case KwVersion:
224      return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
225    default:
226      return createError("unknown directive: " + Tok.Value);
227    }
228  }
229
230  Error parseExport() {
231    COFFShortExport E;
232    E.Name = Tok.Value;
233    read();
234    if (Tok.K == Equal) {
235      read();
236      if (Tok.K != Identifier)
237        return createError("identifier expected, but got " + Tok.Value);
238      E.ExtName = E.Name;
239      E.Name = Tok.Value;
240    } else {
241      unget();
242    }
243
244    if (Machine == IMAGE_FILE_MACHINE_I386) {
245      if (!isDecorated(E.Name, MingwDef))
246        E.Name = (std::string("_").append(E.Name));
247      if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
248        E.ExtName = (std::string("_").append(E.ExtName));
249    }
250
251    for (;;) {
252      read();
253      if (Tok.K == Identifier && Tok.Value[0] == '@') {
254        if (Tok.Value == "@") {
255          // "foo @ 10"
256          read();
257          Tok.Value.getAsInteger(10, E.Ordinal);
258        } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
259          // "foo \n @bar" - Not an ordinal modifier at all, but the next
260          // export (fastcall decorated) - complete the current one.
261          unget();
262          Info.Exports.push_back(E);
263          return Error::success();
264        }
265        // "foo @10"
266        read();
267        if (Tok.K == KwNoname) {
268          E.Noname = true;
269        } else {
270          unget();
271        }
272        continue;
273      }
274      if (Tok.K == KwData) {
275        E.Data = true;
276        continue;
277      }
278      if (Tok.K == KwConstant) {
279        E.Constant = true;
280        continue;
281      }
282      if (Tok.K == KwPrivate) {
283        E.Private = true;
284        continue;
285      }
286      if (Tok.K == EqualEqual) {
287        read();
288        E.AliasTarget = Tok.Value;
289        if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
290          E.AliasTarget = std::string("_").append(E.AliasTarget);
291        continue;
292      }
293      unget();
294      Info.Exports.push_back(E);
295      return Error::success();
296    }
297  }
298
299  // HEAPSIZE/STACKSIZE reserve[,commit]
300  Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
301    if (Error Err = readAsInt(Reserve))
302      return Err;
303    read();
304    if (Tok.K != Comma) {
305      unget();
306      Commit = nullptr;
307      return Error::success();
308    }
309    if (Error Err = readAsInt(Commit))
310      return Err;
311    return Error::success();
312  }
313
314  // NAME outputPath [BASE=address]
315  Error parseName(std::string *Out, uint64_t *Baseaddr) {
316    read();
317    if (Tok.K == Identifier) {
318      *Out = Tok.Value;
319    } else {
320      *Out = "";
321      unget();
322      return Error::success();
323    }
324    read();
325    if (Tok.K == KwBase) {
326      if (Error Err = expect(Equal, "'=' expected"))
327        return Err;
328      if (Error Err = readAsInt(Baseaddr))
329        return Err;
330    } else {
331      unget();
332      *Baseaddr = 0;
333    }
334    return Error::success();
335  }
336
337  // VERSION major[.minor]
338  Error parseVersion(uint32_t *Major, uint32_t *Minor) {
339    read();
340    if (Tok.K != Identifier)
341      return createError("identifier expected, but got " + Tok.Value);
342    StringRef V1, V2;
343    std::tie(V1, V2) = Tok.Value.split('.');
344    if (V1.getAsInteger(10, *Major))
345      return createError("integer expected, but got " + Tok.Value);
346    if (V2.empty())
347      *Minor = 0;
348    else if (V2.getAsInteger(10, *Minor))
349      return createError("integer expected, but got " + Tok.Value);
350    return Error::success();
351  }
352
353  Lexer Lex;
354  Token Tok;
355  std::vector<Token> Stack;
356  MachineTypes Machine;
357  COFFModuleDefinition Info;
358  bool MingwDef;
359};
360
361Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
362                                                         MachineTypes Machine,
363                                                         bool MingwDef) {
364  return Parser(MB.getBuffer(), Machine, MingwDef).parse();
365}
366
367} // namespace object
368} // namespace llvm
369