1259698Sdim//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===// 2259698Sdim// 3259698Sdim// The LLVM Compiler Infrastructure 4259698Sdim// 5259698Sdim// This file is distributed under the University of Illinois Open Source 6259698Sdim// License. See LICENSE.TXT for details. 7259698Sdim// 8259698Sdim//===----------------------------------------------------------------------===// 9259698Sdim// 10259698Sdim// This is a utility class for instrumentation passes (like AddressSanitizer 11259698Sdim// or ThreadSanitizer) to avoid instrumenting some functions or global 12259698Sdim// variables, or to instrument some functions or global variables in a specific 13259698Sdim// way, based on a user-supplied list. 14259698Sdim// 15259698Sdim//===----------------------------------------------------------------------===// 16259698Sdim 17259698Sdim#include "llvm/Transforms/Utils/SpecialCaseList.h" 18259698Sdim#include "llvm/ADT/OwningPtr.h" 19259698Sdim#include "llvm/ADT/SmallVector.h" 20259698Sdim#include "llvm/ADT/STLExtras.h" 21259698Sdim#include "llvm/ADT/StringExtras.h" 22259698Sdim#include "llvm/ADT/StringSet.h" 23259698Sdim#include "llvm/IR/DerivedTypes.h" 24259698Sdim#include "llvm/IR/Function.h" 25259698Sdim#include "llvm/IR/GlobalVariable.h" 26259698Sdim#include "llvm/IR/Module.h" 27259698Sdim#include "llvm/Support/MemoryBuffer.h" 28259698Sdim#include "llvm/Support/Regex.h" 29259698Sdim#include "llvm/Support/raw_ostream.h" 30259698Sdim#include "llvm/Support/system_error.h" 31259698Sdim#include <string> 32259698Sdim#include <utility> 33259698Sdim 34259698Sdimnamespace llvm { 35259698Sdim 36259698Sdim/// Represents a set of regular expressions. Regular expressions which are 37259698Sdim/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all 38259698Sdim/// others are represented as a single pipe-separated regex in RegEx. The 39259698Sdim/// reason for doing so is efficiency; StringSet is much faster at matching 40259698Sdim/// literal strings than Regex. 41259698Sdimstruct SpecialCaseList::Entry { 42259698Sdim StringSet<> Strings; 43259698Sdim Regex *RegEx; 44259698Sdim 45259698Sdim Entry() : RegEx(0) {} 46259698Sdim 47259698Sdim bool match(StringRef Query) const { 48259698Sdim return Strings.count(Query) || (RegEx && RegEx->match(Query)); 49259698Sdim } 50259698Sdim}; 51259698Sdim 52259698SdimSpecialCaseList::SpecialCaseList() : Entries() {} 53259698Sdim 54259698SdimSpecialCaseList *SpecialCaseList::create( 55259698Sdim const StringRef Path, std::string &Error) { 56259698Sdim if (Path.empty()) 57259698Sdim return new SpecialCaseList(); 58259698Sdim OwningPtr<MemoryBuffer> File; 59259698Sdim if (error_code EC = MemoryBuffer::getFile(Path, File)) { 60259698Sdim Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str(); 61259698Sdim return 0; 62259698Sdim } 63259698Sdim return create(File.get(), Error); 64259698Sdim} 65259698Sdim 66259698SdimSpecialCaseList *SpecialCaseList::create( 67259698Sdim const MemoryBuffer *MB, std::string &Error) { 68259698Sdim OwningPtr<SpecialCaseList> SCL(new SpecialCaseList()); 69259698Sdim if (!SCL->parse(MB, Error)) 70259698Sdim return 0; 71259698Sdim return SCL.take(); 72259698Sdim} 73259698Sdim 74259698SdimSpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) { 75259698Sdim std::string Error; 76259698Sdim if (SpecialCaseList *SCL = create(Path, Error)) 77259698Sdim return SCL; 78259698Sdim report_fatal_error(Error); 79259698Sdim} 80259698Sdim 81259698Sdimbool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) { 82259698Sdim // Iterate through each line in the blacklist file. 83259698Sdim SmallVector<StringRef, 16> Lines; 84259698Sdim SplitString(MB->getBuffer(), Lines, "\n\r"); 85259698Sdim StringMap<StringMap<std::string> > Regexps; 86259698Sdim assert(Entries.empty() && 87259698Sdim "parse() should be called on an empty SpecialCaseList"); 88259698Sdim int LineNo = 1; 89259698Sdim for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end(); 90259698Sdim I != E; ++I, ++LineNo) { 91259698Sdim // Ignore empty lines and lines starting with "#" 92259698Sdim if (I->empty() || I->startswith("#")) 93259698Sdim continue; 94259698Sdim // Get our prefix and unparsed regexp. 95259698Sdim std::pair<StringRef, StringRef> SplitLine = I->split(":"); 96259698Sdim StringRef Prefix = SplitLine.first; 97259698Sdim if (SplitLine.second.empty()) { 98259698Sdim // Missing ':' in the line. 99259698Sdim Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" + 100259698Sdim SplitLine.first + "'").str(); 101259698Sdim return false; 102259698Sdim } 103259698Sdim 104259698Sdim std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("="); 105259698Sdim std::string Regexp = SplitRegexp.first; 106259698Sdim StringRef Category = SplitRegexp.second; 107259698Sdim 108259698Sdim // Backwards compatibility. 109259698Sdim if (Prefix == "global-init") { 110259698Sdim Prefix = "global"; 111259698Sdim Category = "init"; 112259698Sdim } else if (Prefix == "global-init-type") { 113259698Sdim Prefix = "type"; 114259698Sdim Category = "init"; 115259698Sdim } else if (Prefix == "global-init-src") { 116259698Sdim Prefix = "src"; 117259698Sdim Category = "init"; 118259698Sdim } 119259698Sdim 120259698Sdim // See if we can store Regexp in Strings. 121259698Sdim if (Regex::isLiteralERE(Regexp)) { 122259698Sdim Entries[Prefix][Category].Strings.insert(Regexp); 123259698Sdim continue; 124259698Sdim } 125259698Sdim 126259698Sdim // Replace * with .* 127259698Sdim for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos; 128259698Sdim pos += strlen(".*")) { 129259698Sdim Regexp.replace(pos, strlen("*"), ".*"); 130259698Sdim } 131259698Sdim 132259698Sdim // Check that the regexp is valid. 133259698Sdim Regex CheckRE(Regexp); 134259698Sdim std::string REError; 135259698Sdim if (!CheckRE.isValid(REError)) { 136259698Sdim Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" + 137259698Sdim SplitLine.second + "': " + REError).str(); 138259698Sdim return false; 139259698Sdim } 140259698Sdim 141259698Sdim // Add this regexp into the proper group by its prefix. 142259698Sdim if (!Regexps[Prefix][Category].empty()) 143259698Sdim Regexps[Prefix][Category] += "|"; 144259698Sdim Regexps[Prefix][Category] += "^" + Regexp + "$"; 145259698Sdim } 146259698Sdim 147259698Sdim // Iterate through each of the prefixes, and create Regexs for them. 148259698Sdim for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(), 149259698Sdim E = Regexps.end(); 150259698Sdim I != E; ++I) { 151259698Sdim for (StringMap<std::string>::const_iterator II = I->second.begin(), 152259698Sdim IE = I->second.end(); 153259698Sdim II != IE; ++II) { 154259698Sdim Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue()); 155259698Sdim } 156259698Sdim } 157259698Sdim return true; 158259698Sdim} 159259698Sdim 160259698SdimSpecialCaseList::~SpecialCaseList() { 161259698Sdim for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(), 162259698Sdim E = Entries.end(); 163259698Sdim I != E; ++I) { 164259698Sdim for (StringMap<Entry>::const_iterator II = I->second.begin(), 165259698Sdim IE = I->second.end(); 166259698Sdim II != IE; ++II) { 167259698Sdim delete II->second.RegEx; 168259698Sdim } 169259698Sdim } 170259698Sdim} 171259698Sdim 172259698Sdimbool SpecialCaseList::isIn(const Function& F, const StringRef Category) const { 173259698Sdim return isIn(*F.getParent(), Category) || 174259698Sdim inSectionCategory("fun", F.getName(), Category); 175259698Sdim} 176259698Sdim 177259698Sdimstatic StringRef GetGlobalTypeString(const GlobalValue &G) { 178259698Sdim // Types of GlobalVariables are always pointer types. 179259698Sdim Type *GType = G.getType()->getElementType(); 180259698Sdim // For now we support blacklisting struct types only. 181259698Sdim if (StructType *SGType = dyn_cast<StructType>(GType)) { 182259698Sdim if (!SGType->isLiteral()) 183259698Sdim return SGType->getName(); 184259698Sdim } 185259698Sdim return "<unknown type>"; 186259698Sdim} 187259698Sdim 188259698Sdimbool SpecialCaseList::isIn(const GlobalVariable &G, 189259698Sdim const StringRef Category) const { 190259698Sdim return isIn(*G.getParent(), Category) || 191259698Sdim inSectionCategory("global", G.getName(), Category) || 192259698Sdim inSectionCategory("type", GetGlobalTypeString(G), Category); 193259698Sdim} 194259698Sdim 195259698Sdimbool SpecialCaseList::isIn(const GlobalAlias &GA, 196259698Sdim const StringRef Category) const { 197259698Sdim if (isIn(*GA.getParent(), Category)) 198259698Sdim return true; 199259698Sdim 200259698Sdim if (isa<FunctionType>(GA.getType()->getElementType())) 201259698Sdim return inSectionCategory("fun", GA.getName(), Category); 202259698Sdim 203259698Sdim return inSectionCategory("global", GA.getName(), Category) || 204259698Sdim inSectionCategory("type", GetGlobalTypeString(GA), Category); 205259698Sdim} 206259698Sdim 207259698Sdimbool SpecialCaseList::isIn(const Module &M, const StringRef Category) const { 208259698Sdim return inSectionCategory("src", M.getModuleIdentifier(), Category); 209259698Sdim} 210259698Sdim 211259698Sdimbool SpecialCaseList::inSectionCategory(const StringRef Section, 212259698Sdim const StringRef Query, 213259698Sdim const StringRef Category) const { 214259698Sdim StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section); 215259698Sdim if (I == Entries.end()) return false; 216259698Sdim StringMap<Entry>::const_iterator II = I->second.find(Category); 217259698Sdim if (II == I->second.end()) return false; 218259698Sdim 219259698Sdim return II->getValue().match(Query); 220259698Sdim} 221259698Sdim 222259698Sdim} // namespace llvm 223