1//===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// SymbolRewriter is a LLVM pass which can rewrite symbols transparently within
10// existing code.  It is implemented as a compiler pass and is configured via a
11// YAML configuration file.
12//
13// The YAML configuration file format is as follows:
14//
15// RewriteMapFile := RewriteDescriptors
16// RewriteDescriptors := RewriteDescriptor | RewriteDescriptors
17// RewriteDescriptor := RewriteDescriptorType ':' '{' RewriteDescriptorFields '}'
18// RewriteDescriptorFields := RewriteDescriptorField | RewriteDescriptorFields
19// RewriteDescriptorField := FieldIdentifier ':' FieldValue ','
20// RewriteDescriptorType := Identifier
21// FieldIdentifier := Identifier
22// FieldValue := Identifier
23// Identifier := [0-9a-zA-Z]+
24//
25// Currently, the following descriptor types are supported:
26//
27// - function:          (function rewriting)
28//      + Source        (original name of the function)
29//      + Target        (explicit transformation)
30//      + Transform     (pattern transformation)
31//      + Naked         (boolean, whether the function is undecorated)
32// - global variable:   (external linkage global variable rewriting)
33//      + Source        (original name of externally visible variable)
34//      + Target        (explicit transformation)
35//      + Transform     (pattern transformation)
36// - global alias:      (global alias rewriting)
37//      + Source        (original name of the aliased name)
38//      + Target        (explicit transformation)
39//      + Transform     (pattern transformation)
40//
41// Note that source and exactly one of [Target, Transform] must be provided
42//
43// New rewrite descriptors can be created.  Addding a new rewrite descriptor
44// involves:
45//
46//  a) extended the rewrite descriptor kind enumeration
47//     (<anonymous>::RewriteDescriptor::RewriteDescriptorType)
48//  b) implementing the new descriptor
49//     (c.f. <anonymous>::ExplicitRewriteFunctionDescriptor)
50//  c) extending the rewrite map parser
51//     (<anonymous>::RewriteMapParser::parseEntry)
52//
53//  Specify to rewrite the symbols using the `-rewrite-symbols` option, and
54//  specify the map file to use for the rewriting via the `-rewrite-map-file`
55//  option.
56//
57//===----------------------------------------------------------------------===//
58
59#include "llvm/Transforms/Utils/SymbolRewriter.h"
60#include "llvm/ADT/SmallString.h"
61#include "llvm/ADT/StringRef.h"
62#include "llvm/ADT/ilist.h"
63#include "llvm/ADT/iterator_range.h"
64#include "llvm/IR/Comdat.h"
65#include "llvm/IR/Function.h"
66#include "llvm/IR/GlobalAlias.h"
67#include "llvm/IR/GlobalObject.h"
68#include "llvm/IR/GlobalVariable.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Value.h"
71#include "llvm/Support/Casting.h"
72#include "llvm/Support/CommandLine.h"
73#include "llvm/Support/ErrorHandling.h"
74#include "llvm/Support/ErrorOr.h"
75#include "llvm/Support/MemoryBuffer.h"
76#include "llvm/Support/Regex.h"
77#include "llvm/Support/SourceMgr.h"
78#include "llvm/Support/YAMLParser.h"
79#include <memory>
80#include <string>
81#include <vector>
82
83using namespace llvm;
84using namespace SymbolRewriter;
85
86#define DEBUG_TYPE "symbol-rewriter"
87
88static cl::list<std::string> RewriteMapFiles("rewrite-map-file",
89                                             cl::desc("Symbol Rewrite Map"),
90                                             cl::value_desc("filename"),
91                                             cl::Hidden);
92
93static void rewriteComdat(Module &M, GlobalObject *GO,
94                          const std::string &Source,
95                          const std::string &Target) {
96  if (Comdat *CD = GO->getComdat()) {
97    auto &Comdats = M.getComdatSymbolTable();
98
99    Comdat *C = M.getOrInsertComdat(Target);
100    C->setSelectionKind(CD->getSelectionKind());
101    GO->setComdat(C);
102
103    Comdats.erase(Comdats.find(Source));
104  }
105}
106
107namespace {
108
109template <RewriteDescriptor::Type DT, typename ValueType,
110          ValueType *(Module::*Get)(StringRef) const>
111class ExplicitRewriteDescriptor : public RewriteDescriptor {
112public:
113  const std::string Source;
114  const std::string Target;
115
116  ExplicitRewriteDescriptor(StringRef S, StringRef T, const bool Naked)
117      : RewriteDescriptor(DT),
118        Source(std::string(Naked ? StringRef("\01" + S.str()) : S)),
119        Target(std::string(T)) {}
120
121  bool performOnModule(Module &M) override;
122
123  static bool classof(const RewriteDescriptor *RD) {
124    return RD->getType() == DT;
125  }
126};
127
128} // end anonymous namespace
129
130template <RewriteDescriptor::Type DT, typename ValueType,
131          ValueType *(Module::*Get)(StringRef) const>
132bool ExplicitRewriteDescriptor<DT, ValueType, Get>::performOnModule(Module &M) {
133  bool Changed = false;
134  if (ValueType *S = (M.*Get)(Source)) {
135    if (GlobalObject *GO = dyn_cast<GlobalObject>(S))
136      rewriteComdat(M, GO, Source, Target);
137
138    if (Value *T = (M.*Get)(Target))
139      S->setValueName(T->getValueName());
140    else
141      S->setName(Target);
142
143    Changed = true;
144  }
145  return Changed;
146}
147
148namespace {
149
150template <RewriteDescriptor::Type DT, typename ValueType,
151          ValueType *(Module::*Get)(StringRef) const,
152          iterator_range<typename iplist<ValueType>::iterator>
153          (Module::*Iterator)()>
154class PatternRewriteDescriptor : public RewriteDescriptor {
155public:
156  const std::string Pattern;
157  const std::string Transform;
158
159  PatternRewriteDescriptor(StringRef P, StringRef T)
160      : RewriteDescriptor(DT), Pattern(std::string(P)),
161        Transform(std::string(T)) {}
162
163  bool performOnModule(Module &M) override;
164
165  static bool classof(const RewriteDescriptor *RD) {
166    return RD->getType() == DT;
167  }
168};
169
170} // end anonymous namespace
171
172template <RewriteDescriptor::Type DT, typename ValueType,
173          ValueType *(Module::*Get)(StringRef) const,
174          iterator_range<typename iplist<ValueType>::iterator>
175          (Module::*Iterator)()>
176bool PatternRewriteDescriptor<DT, ValueType, Get, Iterator>::
177performOnModule(Module &M) {
178  bool Changed = false;
179  for (auto &C : (M.*Iterator)()) {
180    std::string Error;
181
182    std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error);
183    if (!Error.empty())
184      report_fatal_error(Twine("unable to transforn ") + C.getName() + " in " +
185                         M.getModuleIdentifier() + ": " + Error);
186
187    if (C.getName() == Name)
188      continue;
189
190    if (GlobalObject *GO = dyn_cast<GlobalObject>(&C))
191      rewriteComdat(M, GO, std::string(C.getName()), Name);
192
193    if (Value *V = (M.*Get)(Name))
194      C.setValueName(V->getValueName());
195    else
196      C.setName(Name);
197
198    Changed = true;
199  }
200  return Changed;
201}
202
203namespace {
204
205/// Represents a rewrite for an explicitly named (function) symbol.  Both the
206/// source function name and target function name of the transformation are
207/// explicitly spelt out.
208using ExplicitRewriteFunctionDescriptor =
209    ExplicitRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
210                              &Module::getFunction>;
211
212/// Represents a rewrite for an explicitly named (global variable) symbol.  Both
213/// the source variable name and target variable name are spelt out.  This
214/// applies only to module level variables.
215using ExplicitRewriteGlobalVariableDescriptor =
216    ExplicitRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
217                              GlobalVariable, &Module::getGlobalVariable>;
218
219/// Represents a rewrite for an explicitly named global alias.  Both the source
220/// and target name are explicitly spelt out.
221using ExplicitRewriteNamedAliasDescriptor =
222    ExplicitRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
223                              &Module::getNamedAlias>;
224
225/// Represents a rewrite for a regular expression based pattern for functions.
226/// A pattern for the function name is provided and a transformation for that
227/// pattern to determine the target function name create the rewrite rule.
228using PatternRewriteFunctionDescriptor =
229    PatternRewriteDescriptor<RewriteDescriptor::Type::Function, Function,
230                             &Module::getFunction, &Module::functions>;
231
232/// Represents a rewrite for a global variable based upon a matching pattern.
233/// Each global variable matching the provided pattern will be transformed as
234/// described in the transformation pattern for the target.  Applies only to
235/// module level variables.
236using PatternRewriteGlobalVariableDescriptor =
237    PatternRewriteDescriptor<RewriteDescriptor::Type::GlobalVariable,
238                             GlobalVariable, &Module::getGlobalVariable,
239                             &Module::globals>;
240
241/// PatternRewriteNamedAliasDescriptor - represents a rewrite for global
242/// aliases which match a given pattern.  The provided transformation will be
243/// applied to each of the matching names.
244using PatternRewriteNamedAliasDescriptor =
245    PatternRewriteDescriptor<RewriteDescriptor::Type::NamedAlias, GlobalAlias,
246                             &Module::getNamedAlias, &Module::aliases>;
247
248} // end anonymous namespace
249
250bool RewriteMapParser::parse(const std::string &MapFile,
251                             RewriteDescriptorList *DL) {
252  ErrorOr<std::unique_ptr<MemoryBuffer>> Mapping =
253      MemoryBuffer::getFile(MapFile);
254
255  if (!Mapping)
256    report_fatal_error(Twine("unable to read rewrite map '") + MapFile +
257                       "': " + Mapping.getError().message());
258
259  if (!parse(*Mapping, DL))
260    report_fatal_error(Twine("unable to parse rewrite map '") + MapFile + "'");
261
262  return true;
263}
264
265bool RewriteMapParser::parse(std::unique_ptr<MemoryBuffer> &MapFile,
266                             RewriteDescriptorList *DL) {
267  SourceMgr SM;
268  yaml::Stream YS(MapFile->getBuffer(), SM);
269
270  for (auto &Document : YS) {
271    yaml::MappingNode *DescriptorList;
272
273    // ignore empty documents
274    if (isa<yaml::NullNode>(Document.getRoot()))
275      continue;
276
277    DescriptorList = dyn_cast<yaml::MappingNode>(Document.getRoot());
278    if (!DescriptorList) {
279      YS.printError(Document.getRoot(), "DescriptorList node must be a map");
280      return false;
281    }
282
283    for (auto &Descriptor : *DescriptorList)
284      if (!parseEntry(YS, Descriptor, DL))
285        return false;
286  }
287
288  return true;
289}
290
291bool RewriteMapParser::parseEntry(yaml::Stream &YS, yaml::KeyValueNode &Entry,
292                                  RewriteDescriptorList *DL) {
293  yaml::ScalarNode *Key;
294  yaml::MappingNode *Value;
295  SmallString<32> KeyStorage;
296  StringRef RewriteType;
297
298  Key = dyn_cast<yaml::ScalarNode>(Entry.getKey());
299  if (!Key) {
300    YS.printError(Entry.getKey(), "rewrite type must be a scalar");
301    return false;
302  }
303
304  Value = dyn_cast<yaml::MappingNode>(Entry.getValue());
305  if (!Value) {
306    YS.printError(Entry.getValue(), "rewrite descriptor must be a map");
307    return false;
308  }
309
310  RewriteType = Key->getValue(KeyStorage);
311  if (RewriteType.equals("function"))
312    return parseRewriteFunctionDescriptor(YS, Key, Value, DL);
313  else if (RewriteType.equals("global variable"))
314    return parseRewriteGlobalVariableDescriptor(YS, Key, Value, DL);
315  else if (RewriteType.equals("global alias"))
316    return parseRewriteGlobalAliasDescriptor(YS, Key, Value, DL);
317
318  YS.printError(Entry.getKey(), "unknown rewrite type");
319  return false;
320}
321
322bool RewriteMapParser::
323parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
324                               yaml::MappingNode *Descriptor,
325                               RewriteDescriptorList *DL) {
326  bool Naked = false;
327  std::string Source;
328  std::string Target;
329  std::string Transform;
330
331  for (auto &Field : *Descriptor) {
332    yaml::ScalarNode *Key;
333    yaml::ScalarNode *Value;
334    SmallString<32> KeyStorage;
335    SmallString<32> ValueStorage;
336    StringRef KeyValue;
337
338    Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
339    if (!Key) {
340      YS.printError(Field.getKey(), "descriptor key must be a scalar");
341      return false;
342    }
343
344    Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
345    if (!Value) {
346      YS.printError(Field.getValue(), "descriptor value must be a scalar");
347      return false;
348    }
349
350    KeyValue = Key->getValue(KeyStorage);
351    if (KeyValue.equals("source")) {
352      std::string Error;
353
354      Source = std::string(Value->getValue(ValueStorage));
355      if (!Regex(Source).isValid(Error)) {
356        YS.printError(Field.getKey(), "invalid regex: " + Error);
357        return false;
358      }
359    } else if (KeyValue.equals("target")) {
360      Target = std::string(Value->getValue(ValueStorage));
361    } else if (KeyValue.equals("transform")) {
362      Transform = std::string(Value->getValue(ValueStorage));
363    } else if (KeyValue.equals("naked")) {
364      std::string Undecorated;
365
366      Undecorated = std::string(Value->getValue(ValueStorage));
367      Naked = StringRef(Undecorated).lower() == "true" || Undecorated == "1";
368    } else {
369      YS.printError(Field.getKey(), "unknown key for function");
370      return false;
371    }
372  }
373
374  if (Transform.empty() == Target.empty()) {
375    YS.printError(Descriptor,
376                  "exactly one of transform or target must be specified");
377    return false;
378  }
379
380  // TODO see if there is a more elegant solution to selecting the rewrite
381  // descriptor type
382  if (!Target.empty())
383    DL->push_back(std::make_unique<ExplicitRewriteFunctionDescriptor>(
384        Source, Target, Naked));
385  else
386    DL->push_back(
387        std::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform));
388
389  return true;
390}
391
392bool RewriteMapParser::
393parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
394                                     yaml::MappingNode *Descriptor,
395                                     RewriteDescriptorList *DL) {
396  std::string Source;
397  std::string Target;
398  std::string Transform;
399
400  for (auto &Field : *Descriptor) {
401    yaml::ScalarNode *Key;
402    yaml::ScalarNode *Value;
403    SmallString<32> KeyStorage;
404    SmallString<32> ValueStorage;
405    StringRef KeyValue;
406
407    Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
408    if (!Key) {
409      YS.printError(Field.getKey(), "descriptor Key must be a scalar");
410      return false;
411    }
412
413    Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
414    if (!Value) {
415      YS.printError(Field.getValue(), "descriptor value must be a scalar");
416      return false;
417    }
418
419    KeyValue = Key->getValue(KeyStorage);
420    if (KeyValue.equals("source")) {
421      std::string Error;
422
423      Source = std::string(Value->getValue(ValueStorage));
424      if (!Regex(Source).isValid(Error)) {
425        YS.printError(Field.getKey(), "invalid regex: " + Error);
426        return false;
427      }
428    } else if (KeyValue.equals("target")) {
429      Target = std::string(Value->getValue(ValueStorage));
430    } else if (KeyValue.equals("transform")) {
431      Transform = std::string(Value->getValue(ValueStorage));
432    } else {
433      YS.printError(Field.getKey(), "unknown Key for Global Variable");
434      return false;
435    }
436  }
437
438  if (Transform.empty() == Target.empty()) {
439    YS.printError(Descriptor,
440                  "exactly one of transform or target must be specified");
441    return false;
442  }
443
444  if (!Target.empty())
445    DL->push_back(std::make_unique<ExplicitRewriteGlobalVariableDescriptor>(
446        Source, Target,
447        /*Naked*/ false));
448  else
449    DL->push_back(std::make_unique<PatternRewriteGlobalVariableDescriptor>(
450        Source, Transform));
451
452  return true;
453}
454
455bool RewriteMapParser::
456parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
457                                  yaml::MappingNode *Descriptor,
458                                  RewriteDescriptorList *DL) {
459  std::string Source;
460  std::string Target;
461  std::string Transform;
462
463  for (auto &Field : *Descriptor) {
464    yaml::ScalarNode *Key;
465    yaml::ScalarNode *Value;
466    SmallString<32> KeyStorage;
467    SmallString<32> ValueStorage;
468    StringRef KeyValue;
469
470    Key = dyn_cast<yaml::ScalarNode>(Field.getKey());
471    if (!Key) {
472      YS.printError(Field.getKey(), "descriptor key must be a scalar");
473      return false;
474    }
475
476    Value = dyn_cast<yaml::ScalarNode>(Field.getValue());
477    if (!Value) {
478      YS.printError(Field.getValue(), "descriptor value must be a scalar");
479      return false;
480    }
481
482    KeyValue = Key->getValue(KeyStorage);
483    if (KeyValue.equals("source")) {
484      std::string Error;
485
486      Source = std::string(Value->getValue(ValueStorage));
487      if (!Regex(Source).isValid(Error)) {
488        YS.printError(Field.getKey(), "invalid regex: " + Error);
489        return false;
490      }
491    } else if (KeyValue.equals("target")) {
492      Target = std::string(Value->getValue(ValueStorage));
493    } else if (KeyValue.equals("transform")) {
494      Transform = std::string(Value->getValue(ValueStorage));
495    } else {
496      YS.printError(Field.getKey(), "unknown key for Global Alias");
497      return false;
498    }
499  }
500
501  if (Transform.empty() == Target.empty()) {
502    YS.printError(Descriptor,
503                  "exactly one of transform or target must be specified");
504    return false;
505  }
506
507  if (!Target.empty())
508    DL->push_back(std::make_unique<ExplicitRewriteNamedAliasDescriptor>(
509        Source, Target,
510        /*Naked*/ false));
511  else
512    DL->push_back(std::make_unique<PatternRewriteNamedAliasDescriptor>(
513        Source, Transform));
514
515  return true;
516}
517
518PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) {
519  if (!runImpl(M))
520    return PreservedAnalyses::all();
521
522  return PreservedAnalyses::none();
523}
524
525bool RewriteSymbolPass::runImpl(Module &M) {
526  bool Changed;
527
528  Changed = false;
529  for (auto &Descriptor : Descriptors)
530    Changed |= Descriptor->performOnModule(M);
531
532  return Changed;
533}
534
535void RewriteSymbolPass::loadAndParseMapFiles() {
536  const std::vector<std::string> MapFiles(RewriteMapFiles);
537  SymbolRewriter::RewriteMapParser Parser;
538
539  for (const auto &MapFile : MapFiles)
540    Parser.parse(MapFile, &Descriptors);
541}
542