SpecialCaseList.cpp revision 263508
1//===-- SpecialCaseList.cpp - special case list for sanitizers ------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This is a utility class for instrumentation passes (like AddressSanitizer 11// or ThreadSanitizer) to avoid instrumenting some functions or global 12// variables, or to instrument some functions or global variables in a specific 13// way, based on a user-supplied list. 14// 15//===----------------------------------------------------------------------===// 16 17#include "llvm/Transforms/Utils/SpecialCaseList.h" 18#include "llvm/ADT/OwningPtr.h" 19#include "llvm/ADT/SmallVector.h" 20#include "llvm/ADT/STLExtras.h" 21#include "llvm/ADT/StringExtras.h" 22#include "llvm/ADT/StringSet.h" 23#include "llvm/IR/DerivedTypes.h" 24#include "llvm/IR/Function.h" 25#include "llvm/IR/GlobalVariable.h" 26#include "llvm/IR/Module.h" 27#include "llvm/Support/MemoryBuffer.h" 28#include "llvm/Support/Regex.h" 29#include "llvm/Support/raw_ostream.h" 30#include "llvm/Support/system_error.h" 31#include <string> 32#include <utility> 33 34namespace llvm { 35 36/// Represents a set of regular expressions. Regular expressions which are 37/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all 38/// others are represented as a single pipe-separated regex in RegEx. The 39/// reason for doing so is efficiency; StringSet is much faster at matching 40/// literal strings than Regex. 41struct SpecialCaseList::Entry { 42 StringSet<> Strings; 43 Regex *RegEx; 44 45 Entry() : RegEx(0) {} 46 47 bool match(StringRef Query) const { 48 return Strings.count(Query) || (RegEx && RegEx->match(Query)); 49 } 50}; 51 52SpecialCaseList::SpecialCaseList() : Entries() {} 53 54SpecialCaseList *SpecialCaseList::create( 55 const StringRef Path, std::string &Error) { 56 if (Path.empty()) 57 return new SpecialCaseList(); 58 OwningPtr<MemoryBuffer> File; 59 if (error_code EC = MemoryBuffer::getFile(Path, File)) { 60 Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str(); 61 return 0; 62 } 63 return create(File.get(), Error); 64} 65 66SpecialCaseList *SpecialCaseList::create( 67 const MemoryBuffer *MB, std::string &Error) { 68 OwningPtr<SpecialCaseList> SCL(new SpecialCaseList()); 69 if (!SCL->parse(MB, Error)) 70 return 0; 71 return SCL.take(); 72} 73 74SpecialCaseList *SpecialCaseList::createOrDie(const StringRef Path) { 75 std::string Error; 76 if (SpecialCaseList *SCL = create(Path, Error)) 77 return SCL; 78 report_fatal_error(Error); 79} 80 81bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) { 82 // Iterate through each line in the blacklist file. 83 SmallVector<StringRef, 16> Lines; 84 SplitString(MB->getBuffer(), Lines, "\n\r"); 85 StringMap<StringMap<std::string> > Regexps; 86 assert(Entries.empty() && 87 "parse() should be called on an empty SpecialCaseList"); 88 int LineNo = 1; 89 for (SmallVectorImpl<StringRef>::iterator I = Lines.begin(), E = Lines.end(); 90 I != E; ++I, ++LineNo) { 91 // Ignore empty lines and lines starting with "#" 92 if (I->empty() || I->startswith("#")) 93 continue; 94 // Get our prefix and unparsed regexp. 95 std::pair<StringRef, StringRef> SplitLine = I->split(":"); 96 StringRef Prefix = SplitLine.first; 97 if (SplitLine.second.empty()) { 98 // Missing ':' in the line. 99 Error = (Twine("Malformed line ") + Twine(LineNo) + ": '" + 100 SplitLine.first + "'").str(); 101 return false; 102 } 103 104 std::pair<StringRef, StringRef> SplitRegexp = SplitLine.second.split("="); 105 std::string Regexp = SplitRegexp.first; 106 StringRef Category = SplitRegexp.second; 107 108 // Backwards compatibility. 109 if (Prefix == "global-init") { 110 Prefix = "global"; 111 Category = "init"; 112 } else if (Prefix == "global-init-type") { 113 Prefix = "type"; 114 Category = "init"; 115 } else if (Prefix == "global-init-src") { 116 Prefix = "src"; 117 Category = "init"; 118 } 119 120 // See if we can store Regexp in Strings. 121 if (Regex::isLiteralERE(Regexp)) { 122 Entries[Prefix][Category].Strings.insert(Regexp); 123 continue; 124 } 125 126 // Replace * with .* 127 for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos; 128 pos += strlen(".*")) { 129 Regexp.replace(pos, strlen("*"), ".*"); 130 } 131 132 // Check that the regexp is valid. 133 Regex CheckRE(Regexp); 134 std::string REError; 135 if (!CheckRE.isValid(REError)) { 136 Error = (Twine("Malformed regex in line ") + Twine(LineNo) + ": '" + 137 SplitLine.second + "': " + REError).str(); 138 return false; 139 } 140 141 // Add this regexp into the proper group by its prefix. 142 if (!Regexps[Prefix][Category].empty()) 143 Regexps[Prefix][Category] += "|"; 144 Regexps[Prefix][Category] += "^" + Regexp + "$"; 145 } 146 147 // Iterate through each of the prefixes, and create Regexs for them. 148 for (StringMap<StringMap<std::string> >::const_iterator I = Regexps.begin(), 149 E = Regexps.end(); 150 I != E; ++I) { 151 for (StringMap<std::string>::const_iterator II = I->second.begin(), 152 IE = I->second.end(); 153 II != IE; ++II) { 154 Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue()); 155 } 156 } 157 return true; 158} 159 160SpecialCaseList::~SpecialCaseList() { 161 for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(), 162 E = Entries.end(); 163 I != E; ++I) { 164 for (StringMap<Entry>::const_iterator II = I->second.begin(), 165 IE = I->second.end(); 166 II != IE; ++II) { 167 delete II->second.RegEx; 168 } 169 } 170} 171 172bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const { 173 return isIn(*F.getParent(), Category) || 174 inSectionCategory("fun", F.getName(), Category); 175} 176 177static StringRef GetGlobalTypeString(const GlobalValue &G) { 178 // Types of GlobalVariables are always pointer types. 179 Type *GType = G.getType()->getElementType(); 180 // For now we support blacklisting struct types only. 181 if (StructType *SGType = dyn_cast<StructType>(GType)) { 182 if (!SGType->isLiteral()) 183 return SGType->getName(); 184 } 185 return "<unknown type>"; 186} 187 188bool SpecialCaseList::isIn(const GlobalVariable &G, 189 const StringRef Category) const { 190 return isIn(*G.getParent(), Category) || 191 inSectionCategory("global", G.getName(), Category) || 192 inSectionCategory("type", GetGlobalTypeString(G), Category); 193} 194 195bool SpecialCaseList::isIn(const GlobalAlias &GA, 196 const StringRef Category) const { 197 if (isIn(*GA.getParent(), Category)) 198 return true; 199 200 if (isa<FunctionType>(GA.getType()->getElementType())) 201 return inSectionCategory("fun", GA.getName(), Category); 202 203 return inSectionCategory("global", GA.getName(), Category) || 204 inSectionCategory("type", GetGlobalTypeString(GA), Category); 205} 206 207bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const { 208 return inSectionCategory("src", M.getModuleIdentifier(), Category); 209} 210 211bool SpecialCaseList::inSectionCategory(const StringRef Section, 212 const StringRef Query, 213 const StringRef Category) const { 214 StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section); 215 if (I == Entries.end()) return false; 216 StringMap<Entry>::const_iterator II = I->second.find(Category); 217 if (II == I->second.end()) return false; 218 219 return II->getValue().match(Query); 220} 221 222} // namespace llvm 223