1259698Sdim//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===//
2259698Sdim//
3259698Sdim//                     The LLVM Compiler Infrastructure
4259698Sdim//
5259698Sdim// This file is distributed under the University of Illinois Open Source
6259698Sdim// License. See LICENSE.TXT for details.
7259698Sdim//
8259698Sdim//===----------------------------------------------------------------------===//
9259698Sdim//
10259698Sdim// This is a utility class for instrumentation passes (like AddressSanitizer
11259698Sdim// or ThreadSanitizer) to avoid instrumenting some functions or global
12259698Sdim// variables, or to instrument some functions or global variables in a specific
13259698Sdim// way, based on a user-supplied list.
14259698Sdim//
15259698Sdim//===----------------------------------------------------------------------===//
16259698Sdim
17259698Sdim#include "llvm/Transforms/Utils/SpecialCaseList.h"
18259698Sdim#include "llvm/ADT/OwningPtr.h"
19259698Sdim#include "llvm/ADT/SmallVector.h"
20259698Sdim#include "llvm/ADT/STLExtras.h"
21259698Sdim#include "llvm/ADT/StringExtras.h"
22259698Sdim#include "llvm/ADT/StringSet.h"
23259698Sdim#include "llvm/IR/DerivedTypes.h"
24259698Sdim#include "llvm/IR/Function.h"
25259698Sdim#include "llvm/IR/GlobalVariable.h"
26259698Sdim#include "llvm/IR/Module.h"
27259698Sdim#include "llvm/Support/MemoryBuffer.h"
28259698Sdim#include "llvm/Support/Regex.h"
29259698Sdim#include "llvm/Support/raw_ostream.h"
30259698Sdim#include "llvm/Support/system_error.h"
31259698Sdim#include <string>
32259698Sdim#include <utility>
33259698Sdim
34259698Sdimnamespace llvm {
35259698Sdim
36259698Sdim/// Represents a set of regular expressions.  Regular expressions which are
37259698Sdim/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
38259698Sdim/// others are represented as a single pipe-separated regex in RegEx.  The
39259698Sdim/// reason for doing so is efficiency; StringSet is much faster at matching
40259698Sdim/// literal strings than Regex.
41259698Sdimstruct SpecialCaseList::Entry {
42259698Sdim  StringSet<> Strings;
43259698Sdim  Regex *RegEx;
44259698Sdim
45259698Sdim  Entry() : RegEx(0) {}
46259698Sdim
47259698Sdim  bool match(StringRef Query) const {
48259698Sdim    return Strings.count(Query) || (RegEx && RegEx->match(Query));
49259698Sdim  }
50259698Sdim};
51259698Sdim
52259698SdimSpecialCaseList::SpecialCaseList() : Entries() {}
53259698Sdim
54259698SdimSpecialCaseList *SpecialCaseList::create(
55259698Sdim    const StringRef Path, std::string &Error) {
56259698Sdim  if (Path.empty())
57259698Sdim    return new SpecialCaseList();
58259698Sdim  OwningPtr<MemoryBuffer> File;
59259698Sdim  if (error_code EC = MemoryBuffer::getFile(Path, File)) {
60259698Sdim    Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str();
61259698Sdim    return 0;
62259698Sdim  }
63259698Sdim  return create(File.get(), Error);
64259698Sdim}
65259698Sdim
66259698SdimSpecialCaseList *SpecialCaseList::create(
67259698Sdim    const MemoryBuffer *MB, std::string &Error) {
68259698Sdim  OwningPtr<SpecialCaseList> SCL(new SpecialCaseList());
69259698Sdim  if (!SCL->parse(MB, Error))
70259698Sdim    return 0;
71259698Sdim  return SCL.take();
72259698Sdim}
73259698Sdim
74259698SdimSpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) {
75259698Sdim  std::string Error;
76259698Sdim  if (SpecialCaseList *SCL = create(Path, Error))
77259698Sdim    return SCL;
78259698Sdim  report_fatal_error(Error);
79259698Sdim}
80259698Sdim
81259698Sdimbool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
82259698Sdim  // Iterate through each line in the blacklist file.
83259698Sdim  SmallVector<StringRef, 16> Lines;
84259698Sdim  SplitString(MB->getBuffer(), Lines, "\n\r");
85259698Sdim  StringMap<StringMap<std::string> > Regexps;
86259698Sdim  assert(Entries.empty() &&
87259698Sdim         "parse() should be called on an empty SpecialCaseList");
88259698Sdim  int LineNo = 1;
89259698Sdim  for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end();
90259698Sdim       I != E; ++I, ++LineNo) {
91259698Sdim    // Ignore empty lines and lines starting with "#"
92259698Sdim    if (I->empty() || I->startswith("#"))
93259698Sdim      continue;
94259698Sdim    // Get our prefix and unparsed regexp.
95259698Sdim    std::pair<StringRef, StringRef> SplitLine = I->split(":");
96259698Sdim    StringRef Prefix = SplitLine.first;
97259698Sdim    if (SplitLine.second.empty()) {
98259698Sdim      // Missing ':' in the line.
99259698Sdim      Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" +
100259698Sdim               SplitLine.first + "'").str();
101259698Sdim      return false;
102259698Sdim    }
103259698Sdim
104259698Sdim    std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("=");
105259698Sdim    std::string Regexp = SplitRegexp.first;
106259698Sdim    StringRef Category = SplitRegexp.second;
107259698Sdim
108259698Sdim    // Backwards compatibility.
109259698Sdim    if (Prefix == "global-init") {
110259698Sdim      Prefix = "global";
111259698Sdim      Category = "init";
112259698Sdim    } else if (Prefix == "global-init-type") {
113259698Sdim      Prefix = "type";
114259698Sdim      Category = "init";
115259698Sdim    } else if (Prefix == "global-init-src") {
116259698Sdim      Prefix = "src";
117259698Sdim      Category = "init";
118259698Sdim    }
119259698Sdim
120259698Sdim    // See if we can store Regexp in Strings.
121259698Sdim    if (Regex::isLiteralERE(Regexp)) {
122259698Sdim      Entries[Prefix][Category].Strings.insert(Regexp);
123259698Sdim      continue;
124259698Sdim    }
125259698Sdim
126259698Sdim    // Replace * with .*
127259698Sdim    for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
128259698Sdim         pos += strlen(".*")) {
129259698Sdim      Regexp.replace(pos, strlen("*"), ".*");
130259698Sdim    }
131259698Sdim
132259698Sdim    // Check that the regexp is valid.
133259698Sdim    Regex CheckRE(Regexp);
134259698Sdim    std::string REError;
135259698Sdim    if (!CheckRE.isValid(REError)) {
136259698Sdim      Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" +
137259698Sdim               SplitLine.second + "': " + REError).str();
138259698Sdim      return false;
139259698Sdim    }
140259698Sdim
141259698Sdim    // Add this regexp into the proper group by its prefix.
142259698Sdim    if (!Regexps[Prefix][Category].empty())
143259698Sdim      Regexps[Prefix][Category] += "|";
144259698Sdim    Regexps[Prefix][Category] += "^" + Regexp + "$";
145259698Sdim  }
146259698Sdim
147259698Sdim  // Iterate through each of the prefixes, and create Regexs for them.
148259698Sdim  for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(),
149259698Sdim                                                          E = Regexps.end();
150259698Sdim       I != E; ++I) {
151259698Sdim    for (StringMap<std::string>::const_iterator II = I->second.begin(),
152259698Sdim                                                IE = I->second.end();
153259698Sdim         II != IE; ++II) {
154259698Sdim      Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
155259698Sdim    }
156259698Sdim  }
157259698Sdim  return true;
158259698Sdim}
159259698Sdim
160259698SdimSpecialCaseList::~SpecialCaseList() {
161259698Sdim  for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(),
162259698Sdim                                              E = Entries.end();
163259698Sdim       I != E; ++I) {
164259698Sdim    for (StringMap<Entry>::const_iterator II = I->second.begin(),
165259698Sdim                                          IE = I->second.end();
166259698Sdim         II != IE; ++II) {
167259698Sdim      delete II->second.RegEx;
168259698Sdim    }
169259698Sdim  }
170259698Sdim}
171259698Sdim
172259698Sdimbool SpecialCaseList::isIn(const Function& F, const StringRef Category) const {
173259698Sdim  return isIn(*F.getParent(), Category) ||
174259698Sdim         inSectionCategory("fun", F.getName(), Category);
175259698Sdim}
176259698Sdim
177259698Sdimstatic StringRef GetGlobalTypeString(const GlobalValue &G) {
178259698Sdim  // Types of GlobalVariables are always pointer types.
179259698Sdim  Type *GType = G.getType()->getElementType();
180259698Sdim  // For now we support blacklisting struct types only.
181259698Sdim  if (StructType *SGType = dyn_cast<StructType>(GType)) {
182259698Sdim    if (!SGType->isLiteral())
183259698Sdim      return SGType->getName();
184259698Sdim  }
185259698Sdim  return "<unknown type>";
186259698Sdim}
187259698Sdim
188259698Sdimbool SpecialCaseList::isIn(const GlobalVariable &G,
189259698Sdim                           const StringRef Category) const {
190259698Sdim  return isIn(*G.getParent(), Category) ||
191259698Sdim         inSectionCategory("global", G.getName(), Category) ||
192259698Sdim         inSectionCategory("type", GetGlobalTypeString(G), Category);
193259698Sdim}
194259698Sdim
195259698Sdimbool SpecialCaseList::isIn(const GlobalAlias &GA,
196259698Sdim                           const StringRef Category) const {
197259698Sdim  if (isIn(*GA.getParent(), Category))
198259698Sdim    return true;
199259698Sdim
200259698Sdim  if (isa<FunctionType>(GA.getType()->getElementType()))
201259698Sdim    return inSectionCategory("fun", GA.getName(), Category);
202259698Sdim
203259698Sdim  return inSectionCategory("global", GA.getName(), Category) ||
204259698Sdim         inSectionCategory("type", GetGlobalTypeString(GA), Category);
205259698Sdim}
206259698Sdim
207259698Sdimbool SpecialCaseList::isIn(const Module &M, const StringRef Category) const {
208259698Sdim  return inSectionCategory("src", M.getModuleIdentifier(), Category);
209259698Sdim}
210259698Sdim
211259698Sdimbool SpecialCaseList::inSectionCategory(const StringRef Section,
212259698Sdim                                        const StringRef Query,
213259698Sdim                                        const StringRef Category) const {
214259698Sdim  StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
215259698Sdim  if (I == Entries.end()) return false;
216259698Sdim  StringMap<Entry>::const_iterator II = I->second.find(Category);
217259698Sdim  if (II == I->second.end()) return false;
218259698Sdim
219259698Sdim  return II->getValue().match(Query);
220259698Sdim}
221259698Sdim
222259698Sdim}  // namespace llvm
223