1259698Sdim//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===// 2259698Sdim// 3259698Sdim// The LLVM Compiler Infrastructure 4259698Sdim// 5259698Sdim// This file is distributed under the University of Illinois Open Source 6259698Sdim// License. See LICENSE.TXT for details. 7259698Sdim// 8259698Sdim//===----------------------------------------------------------------------===// 9259698Sdim// 10259698Sdim// This file implements the SampleProfileLoader transformation. This pass 11259698Sdim// reads a profile file generated by a sampling profiler (e.g. Linux Perf - 12259698Sdim// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the 13259698Sdim// profile information in the given profile. 14259698Sdim// 15259698Sdim// This pass generates branch weight annotations on the IR: 16259698Sdim// 17259698Sdim// - prof: Represents branch weights. This annotation is added to branches 18259698Sdim// to indicate the weights of each edge coming out of the branch. 19259698Sdim// The weight of each edge is the weight of the target block for 20259698Sdim// that edge. The weight of a block B is computed as the maximum 21259698Sdim// number of samples found in B. 22259698Sdim// 23259698Sdim//===----------------------------------------------------------------------===// 24259698Sdim 25259698Sdim#define DEBUG_TYPE "sample-profile" 26259698Sdim 27259698Sdim#include "llvm/ADT/DenseMap.h" 28259698Sdim#include "llvm/ADT/OwningPtr.h" 29259698Sdim#include "llvm/ADT/StringMap.h" 30259698Sdim#include "llvm/ADT/StringRef.h" 31259698Sdim#include "llvm/DebugInfo/DIContext.h" 32259698Sdim#include "llvm/IR/Constants.h" 33259698Sdim#include "llvm/IR/Function.h" 34259698Sdim#include "llvm/IR/Instructions.h" 35259698Sdim#include "llvm/IR/LLVMContext.h" 36259698Sdim#include "llvm/IR/Metadata.h" 37259698Sdim#include "llvm/IR/MDBuilder.h" 38259698Sdim#include "llvm/IR/Module.h" 39259698Sdim#include "llvm/Pass.h" 40259698Sdim#include "llvm/Support/CommandLine.h" 41259698Sdim#include "llvm/Support/Debug.h" 42259698Sdim#include "llvm/Support/InstIterator.h" 43259698Sdim#include "llvm/Support/MemoryBuffer.h" 44259698Sdim#include "llvm/Support/Regex.h" 45259698Sdim#include "llvm/Support/raw_ostream.h" 46259698Sdim#include "llvm/Transforms/Scalar.h" 47259698Sdim 48259698Sdimusing namespace llvm; 49259698Sdim 50259698Sdim// Command line option to specify the file to read samples from. This is 51259698Sdim// mainly used for debugging. 52259698Sdimstatic cl::opt<std::string> SampleProfileFile( 53259698Sdim "sample-profile-file", cl::init(""), cl::value_desc("filename"), 54259698Sdim cl::desc("Profile file loaded by -sample-profile"), cl::Hidden); 55259698Sdim 56259698Sdimnamespace { 57259698Sdim/// \brief Sample-based profile reader. 58259698Sdim/// 59259698Sdim/// Each profile contains sample counts for all the functions 60259698Sdim/// executed. Inside each function, statements are annotated with the 61259698Sdim/// collected samples on all the instructions associated with that 62259698Sdim/// statement. 63259698Sdim/// 64259698Sdim/// For this to produce meaningful data, the program needs to be 65259698Sdim/// compiled with some debug information (at minimum, line numbers: 66259698Sdim/// -gline-tables-only). Otherwise, it will be impossible to match IR 67259698Sdim/// instructions to the line numbers collected by the profiler. 68259698Sdim/// 69259698Sdim/// From the profile file, we are interested in collecting the 70259698Sdim/// following information: 71259698Sdim/// 72259698Sdim/// * A list of functions included in the profile (mangled names). 73259698Sdim/// 74259698Sdim/// * For each function F: 75259698Sdim/// 1. The total number of samples collected in F. 76259698Sdim/// 77259698Sdim/// 2. The samples collected at each line in F. To provide some 78259698Sdim/// protection against source code shuffling, line numbers should 79259698Sdim/// be relative to the start of the function. 80259698Sdimclass SampleProfile { 81259698Sdimpublic: 82259698Sdim SampleProfile(StringRef F) : Profiles(0), Filename(F) {} 83259698Sdim 84259698Sdim void dump(); 85259698Sdim void loadText(); 86259698Sdim void loadNative() { llvm_unreachable("not implemented"); } 87259698Sdim bool emitAnnotations(Function &F); 88259698Sdim void printFunctionProfile(raw_ostream &OS, StringRef FName); 89259698Sdim void dumpFunctionProfile(StringRef FName); 90259698Sdim 91259698Sdimprotected: 92259698Sdim typedef DenseMap<uint32_t, uint32_t> BodySampleMap; 93259698Sdim typedef DenseMap<BasicBlock *, uint32_t> BlockWeightMap; 94259698Sdim 95259698Sdim /// \brief Representation of the runtime profile for a function. 96259698Sdim /// 97259698Sdim /// This data structure contains the runtime profile for a given 98259698Sdim /// function. It contains the total number of samples collected 99259698Sdim /// in the function and a map of samples collected in every statement. 100259698Sdim struct FunctionProfile { 101259698Sdim /// \brief Total number of samples collected inside this function. 102259698Sdim /// 103259698Sdim /// Samples are cumulative, they include all the samples collected 104259698Sdim /// inside this function and all its inlined callees. 105259698Sdim unsigned TotalSamples; 106259698Sdim 107259698Sdim // \brief Total number of samples collected at the head of the function. 108259698Sdim unsigned TotalHeadSamples; 109259698Sdim 110259698Sdim /// \brief Map line offsets to collected samples. 111259698Sdim /// 112259698Sdim /// Each entry in this map contains the number of samples 113259698Sdim /// collected at the corresponding line offset. All line locations 114259698Sdim /// are an offset from the start of the function. 115259698Sdim BodySampleMap BodySamples; 116259698Sdim 117259698Sdim /// \brief Map basic blocks to their computed weights. 118259698Sdim /// 119259698Sdim /// The weight of a basic block is defined to be the maximum 120259698Sdim /// of all the instruction weights in that block. 121259698Sdim BlockWeightMap BlockWeights; 122259698Sdim }; 123259698Sdim 124259698Sdim uint32_t getInstWeight(Instruction &I, unsigned FirstLineno, 125259698Sdim BodySampleMap &BodySamples); 126259698Sdim uint32_t computeBlockWeight(BasicBlock *B, unsigned FirstLineno, 127259698Sdim BodySampleMap &BodySamples); 128259698Sdim 129259698Sdim /// \brief Map every function to its associated profile. 130259698Sdim /// 131259698Sdim /// The profile of every function executed at runtime is collected 132259698Sdim /// in the structure FunctionProfile. This maps function objects 133259698Sdim /// to their corresponding profiles. 134259698Sdim StringMap<FunctionProfile> Profiles; 135259698Sdim 136259698Sdim /// \brief Path name to the file holding the profile data. 137259698Sdim /// 138259698Sdim /// The format of this file is defined by each profiler 139259698Sdim /// independently. If possible, the profiler should have a text 140259698Sdim /// version of the profile format to be used in constructing test 141259698Sdim /// cases and debugging. 142259698Sdim StringRef Filename; 143259698Sdim}; 144259698Sdim 145259698Sdim/// \brief Loader class for text-based profiles. 146259698Sdim/// 147259698Sdim/// This class defines a simple interface to read text files containing 148259698Sdim/// profiles. It keeps track of line number information and location of 149259698Sdim/// the file pointer. Users of this class are responsible for actually 150259698Sdim/// parsing the lines returned by the readLine function. 151259698Sdim/// 152259698Sdim/// TODO - This does not really belong here. It is a generic text file 153259698Sdim/// reader. It should be moved to the Support library and made more general. 154259698Sdimclass ExternalProfileTextLoader { 155259698Sdimpublic: 156259698Sdim ExternalProfileTextLoader(StringRef F) : Filename(F) { 157259698Sdim error_code EC; 158259698Sdim EC = MemoryBuffer::getFile(Filename, Buffer); 159259698Sdim if (EC) 160259698Sdim report_fatal_error("Could not open profile file " + Filename + ": " + 161259698Sdim EC.message()); 162259698Sdim FP = Buffer->getBufferStart(); 163259698Sdim Lineno = 0; 164259698Sdim } 165259698Sdim 166259698Sdim /// \brief Read a line from the mapped file. 167259698Sdim StringRef readLine() { 168259698Sdim size_t Length = 0; 169259698Sdim const char *start = FP; 170259698Sdim while (FP != Buffer->getBufferEnd() && *FP != '\n') { 171259698Sdim Length++; 172259698Sdim FP++; 173259698Sdim } 174259698Sdim if (FP != Buffer->getBufferEnd()) 175259698Sdim FP++; 176259698Sdim Lineno++; 177259698Sdim return StringRef(start, Length); 178259698Sdim } 179259698Sdim 180259698Sdim /// \brief Return true, if we've reached EOF. 181259698Sdim bool atEOF() const { return FP == Buffer->getBufferEnd(); } 182259698Sdim 183259698Sdim /// \brief Report a parse error message and stop compilation. 184259698Sdim void reportParseError(Twine Msg) const { 185259698Sdim report_fatal_error(Filename + ":" + Twine(Lineno) + ": " + Msg + "\n"); 186259698Sdim } 187259698Sdim 188259698Sdimprivate: 189259698Sdim /// \brief Memory buffer holding the text file. 190259698Sdim OwningPtr<MemoryBuffer> Buffer; 191259698Sdim 192259698Sdim /// \brief Current position into the memory buffer. 193259698Sdim const char *FP; 194259698Sdim 195259698Sdim /// \brief Current line number. 196259698Sdim int64_t Lineno; 197259698Sdim 198259698Sdim /// \brief Path name where to the profile file. 199259698Sdim StringRef Filename; 200259698Sdim}; 201259698Sdim 202259698Sdim/// \brief Sample profile pass. 203259698Sdim/// 204259698Sdim/// This pass reads profile data from the file specified by 205259698Sdim/// -sample-profile-file and annotates every affected function with the 206259698Sdim/// profile information found in that file. 207259698Sdimclass SampleProfileLoader : public FunctionPass { 208259698Sdimpublic: 209259698Sdim // Class identification, replacement for typeinfo 210259698Sdim static char ID; 211259698Sdim 212259698Sdim SampleProfileLoader(StringRef Name = SampleProfileFile) 213259698Sdim : FunctionPass(ID), Profiler(0), Filename(Name) { 214259698Sdim initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry()); 215259698Sdim } 216259698Sdim 217259698Sdim virtual bool doInitialization(Module &M); 218259698Sdim 219259698Sdim void dump() { Profiler->dump(); } 220259698Sdim 221259698Sdim virtual const char *getPassName() const { return "Sample profile pass"; } 222259698Sdim 223259698Sdim virtual bool runOnFunction(Function &F); 224259698Sdim 225259698Sdim virtual void getAnalysisUsage(AnalysisUsage &AU) const { 226259698Sdim AU.setPreservesCFG(); 227259698Sdim } 228259698Sdim 229259698Sdimprotected: 230259698Sdim /// \brief Profile reader object. 231259698Sdim OwningPtr<SampleProfile> Profiler; 232259698Sdim 233259698Sdim /// \brief Name of the profile file to load. 234259698Sdim StringRef Filename; 235259698Sdim}; 236259698Sdim} 237259698Sdim 238259698Sdim/// \brief Print the function profile for \p FName on stream \p OS. 239259698Sdim/// 240259698Sdim/// \param OS Stream to emit the output to. 241259698Sdim/// \param FName Name of the function to print. 242259698Sdimvoid SampleProfile::printFunctionProfile(raw_ostream &OS, StringRef FName) { 243259698Sdim FunctionProfile FProfile = Profiles[FName]; 244259698Sdim OS << "Function: " << FName << ", " << FProfile.TotalSamples << ", " 245259698Sdim << FProfile.TotalHeadSamples << ", " << FProfile.BodySamples.size() 246259698Sdim << " sampled lines\n"; 247259698Sdim for (BodySampleMap::const_iterator SI = FProfile.BodySamples.begin(), 248259698Sdim SE = FProfile.BodySamples.end(); 249259698Sdim SI != SE; ++SI) 250259698Sdim OS << "\tline offset: " << SI->first 251259698Sdim << ", number of samples: " << SI->second << "\n"; 252259698Sdim OS << "\n"; 253259698Sdim} 254259698Sdim 255259698Sdim/// \brief Dump the function profile for \p FName. 256259698Sdim/// 257259698Sdim/// \param FName Name of the function to print. 258259698Sdimvoid SampleProfile::dumpFunctionProfile(StringRef FName) { 259259698Sdim printFunctionProfile(dbgs(), FName); 260259698Sdim} 261259698Sdim 262259698Sdim/// \brief Dump all the function profiles found. 263259698Sdimvoid SampleProfile::dump() { 264259698Sdim for (StringMap<FunctionProfile>::const_iterator I = Profiles.begin(), 265259698Sdim E = Profiles.end(); 266259698Sdim I != E; ++I) 267259698Sdim dumpFunctionProfile(I->getKey()); 268259698Sdim} 269259698Sdim 270259698Sdim/// \brief Load samples from a text file. 271259698Sdim/// 272259698Sdim/// The file is divided in two segments: 273259698Sdim/// 274259698Sdim/// Symbol table (represented with the string "symbol table") 275259698Sdim/// Number of symbols in the table 276259698Sdim/// symbol 1 277259698Sdim/// symbol 2 278259698Sdim/// ... 279259698Sdim/// symbol N 280259698Sdim/// 281259698Sdim/// Function body profiles 282259698Sdim/// function1:total_samples:total_head_samples:number_of_locations 283259698Sdim/// location_offset_1: number_of_samples 284259698Sdim/// location_offset_2: number_of_samples 285259698Sdim/// ... 286259698Sdim/// location_offset_N: number_of_samples 287259698Sdim/// 288259698Sdim/// Function names must be mangled in order for the profile loader to 289259698Sdim/// match them in the current translation unit. 290259698Sdim/// 291259698Sdim/// Since this is a flat profile, a function that shows up more than 292259698Sdim/// once gets all its samples aggregated across all its instances. 293259698Sdim/// TODO - flat profiles are too imprecise to provide good optimization 294259698Sdim/// opportunities. Convert them to context-sensitive profile. 295259698Sdim/// 296259698Sdim/// This textual representation is useful to generate unit tests and 297259698Sdim/// for debugging purposes, but it should not be used to generate 298259698Sdim/// profiles for large programs, as the representation is extremely 299259698Sdim/// inefficient. 300259698Sdimvoid SampleProfile::loadText() { 301259698Sdim ExternalProfileTextLoader Loader(Filename); 302259698Sdim 303259698Sdim // Read the symbol table. 304259698Sdim StringRef Line = Loader.readLine(); 305259698Sdim if (Line != "symbol table") 306259698Sdim Loader.reportParseError("Expected 'symbol table', found " + Line); 307259698Sdim int NumSymbols; 308259698Sdim Line = Loader.readLine(); 309259698Sdim if (Line.getAsInteger(10, NumSymbols)) 310259698Sdim Loader.reportParseError("Expected a number, found " + Line); 311259698Sdim for (int I = 0; I < NumSymbols; I++) { 312259698Sdim StringRef FName = Loader.readLine(); 313259698Sdim FunctionProfile &FProfile = Profiles[FName]; 314259698Sdim FProfile.BodySamples.clear(); 315259698Sdim FProfile.TotalSamples = 0; 316259698Sdim FProfile.TotalHeadSamples = 0; 317259698Sdim } 318259698Sdim 319259698Sdim // Read the profile of each function. Since each function may be 320259698Sdim // mentioned more than once, and we are collecting flat profiles, 321259698Sdim // accumulate samples as we parse them. 322259698Sdim Regex HeadRE("^([^:]+):([0-9]+):([0-9]+):([0-9]+)$"); 323259698Sdim Regex LineSample("^([0-9]+): ([0-9]+)$"); 324259698Sdim while (!Loader.atEOF()) { 325259698Sdim SmallVector<StringRef, 4> Matches; 326259698Sdim Line = Loader.readLine(); 327259698Sdim if (!HeadRE.match(Line, &Matches)) 328259698Sdim Loader.reportParseError("Expected 'mangled_name:NUM:NUM:NUM', found " + 329259698Sdim Line); 330259698Sdim assert(Matches.size() == 5); 331259698Sdim StringRef FName = Matches[1]; 332259698Sdim unsigned NumSamples, NumHeadSamples, NumSampledLines; 333259698Sdim Matches[2].getAsInteger(10, NumSamples); 334259698Sdim Matches[3].getAsInteger(10, NumHeadSamples); 335259698Sdim Matches[4].getAsInteger(10, NumSampledLines); 336259698Sdim FunctionProfile &FProfile = Profiles[FName]; 337259698Sdim FProfile.TotalSamples += NumSamples; 338259698Sdim FProfile.TotalHeadSamples += NumHeadSamples; 339259698Sdim BodySampleMap &SampleMap = FProfile.BodySamples; 340259698Sdim unsigned I; 341259698Sdim for (I = 0; I < NumSampledLines && !Loader.atEOF(); I++) { 342259698Sdim Line = Loader.readLine(); 343259698Sdim if (!LineSample.match(Line, &Matches)) 344259698Sdim Loader.reportParseError("Expected 'NUM: NUM', found " + Line); 345259698Sdim assert(Matches.size() == 3); 346259698Sdim unsigned LineOffset, NumSamples; 347259698Sdim Matches[1].getAsInteger(10, LineOffset); 348259698Sdim Matches[2].getAsInteger(10, NumSamples); 349259698Sdim SampleMap[LineOffset] += NumSamples; 350259698Sdim } 351259698Sdim 352259698Sdim if (I < NumSampledLines) 353259698Sdim Loader.reportParseError("Unexpected end of file"); 354259698Sdim } 355259698Sdim} 356259698Sdim 357259698Sdim/// \brief Get the weight for an instruction. 358259698Sdim/// 359259698Sdim/// The "weight" of an instruction \p Inst is the number of samples 360259698Sdim/// collected on that instruction at runtime. To retrieve it, we 361259698Sdim/// need to compute the line number of \p Inst relative to the start of its 362259698Sdim/// function. We use \p FirstLineno to compute the offset. We then 363259698Sdim/// look up the samples collected for \p Inst using \p BodySamples. 364259698Sdim/// 365259698Sdim/// \param Inst Instruction to query. 366259698Sdim/// \param FirstLineno Line number of the first instruction in the function. 367259698Sdim/// \param BodySamples Map of relative source line locations to samples. 368259698Sdim/// 369259698Sdim/// \returns The profiled weight of I. 370259698Sdimuint32_t SampleProfile::getInstWeight(Instruction &Inst, unsigned FirstLineno, 371259698Sdim BodySampleMap &BodySamples) { 372259698Sdim unsigned LOffset = Inst.getDebugLoc().getLine() - FirstLineno + 1; 373259698Sdim return BodySamples.lookup(LOffset); 374259698Sdim} 375259698Sdim 376259698Sdim/// \brief Compute the weight of a basic block. 377259698Sdim/// 378259698Sdim/// The weight of basic block \p B is the maximum weight of all the 379259698Sdim/// instructions in B. 380259698Sdim/// 381259698Sdim/// \param B The basic block to query. 382259698Sdim/// \param FirstLineno The line number for the first line in the 383259698Sdim/// function holding B. 384259698Sdim/// \param BodySamples The map containing all the samples collected in that 385259698Sdim/// function. 386259698Sdim/// 387259698Sdim/// \returns The computed weight of B. 388259698Sdimuint32_t SampleProfile::computeBlockWeight(BasicBlock *B, unsigned FirstLineno, 389259698Sdim BodySampleMap &BodySamples) { 390259698Sdim // If we've computed B's weight before, return it. 391259698Sdim Function *F = B->getParent(); 392259698Sdim FunctionProfile &FProfile = Profiles[F->getName()]; 393259698Sdim std::pair<BlockWeightMap::iterator, bool> Entry = 394259698Sdim FProfile.BlockWeights.insert(std::make_pair(B, 0)); 395259698Sdim if (!Entry.second) 396259698Sdim return Entry.first->second; 397259698Sdim 398259698Sdim // Otherwise, compute and cache B's weight. 399259698Sdim uint32_t Weight = 0; 400259698Sdim for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) { 401259698Sdim uint32_t InstWeight = getInstWeight(*I, FirstLineno, BodySamples); 402259698Sdim if (InstWeight > Weight) 403259698Sdim Weight = InstWeight; 404259698Sdim } 405259698Sdim Entry.first->second = Weight; 406259698Sdim return Weight; 407259698Sdim} 408259698Sdim 409259698Sdim/// \brief Generate branch weight metadata for all branches in \p F. 410259698Sdim/// 411259698Sdim/// For every branch instruction B in \p F, we compute the weight of the 412259698Sdim/// target block for each of the edges out of B. This is the weight 413259698Sdim/// that we associate with that branch. 414259698Sdim/// 415259698Sdim/// TODO - This weight assignment will most likely be wrong if the 416259698Sdim/// target branch has more than two predecessors. This needs to be done 417259698Sdim/// using some form of flow propagation. 418259698Sdim/// 419259698Sdim/// Once all the branch weights are computed, we emit the MD_prof 420259698Sdim/// metadata on B using the computed values. 421259698Sdim/// 422259698Sdim/// \param F The function to query. 423259698Sdimbool SampleProfile::emitAnnotations(Function &F) { 424259698Sdim bool Changed = false; 425259698Sdim FunctionProfile &FProfile = Profiles[F.getName()]; 426259698Sdim unsigned FirstLineno = inst_begin(F)->getDebugLoc().getLine(); 427259698Sdim MDBuilder MDB(F.getContext()); 428259698Sdim 429259698Sdim // Clear the block weights cache. 430259698Sdim FProfile.BlockWeights.clear(); 431259698Sdim 432259698Sdim // When we find a branch instruction: For each edge E out of the branch, 433259698Sdim // the weight of E is the weight of the target block. 434259698Sdim for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { 435259698Sdim BasicBlock *B = I; 436259698Sdim TerminatorInst *TI = B->getTerminator(); 437259698Sdim if (TI->getNumSuccessors() == 1) 438259698Sdim continue; 439259698Sdim if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI)) 440259698Sdim continue; 441259698Sdim 442259698Sdim SmallVector<uint32_t, 4> Weights; 443259698Sdim unsigned NSuccs = TI->getNumSuccessors(); 444259698Sdim for (unsigned I = 0; I < NSuccs; ++I) { 445259698Sdim BasicBlock *Succ = TI->getSuccessor(I); 446259698Sdim uint32_t Weight = 447259698Sdim computeBlockWeight(Succ, FirstLineno, FProfile.BodySamples); 448259698Sdim Weights.push_back(Weight); 449259698Sdim } 450259698Sdim 451259698Sdim TI->setMetadata(llvm::LLVMContext::MD_prof, 452259698Sdim MDB.createBranchWeights(Weights)); 453259698Sdim Changed = true; 454259698Sdim } 455259698Sdim 456259698Sdim return Changed; 457259698Sdim} 458259698Sdim 459259698Sdimchar SampleProfileLoader::ID = 0; 460259698SdimINITIALIZE_PASS(SampleProfileLoader, "sample-profile", "Sample Profile loader", 461259698Sdim false, false) 462259698Sdim 463259698Sdimbool SampleProfileLoader::runOnFunction(Function &F) { 464259698Sdim return Profiler->emitAnnotations(F); 465259698Sdim} 466259698Sdim 467259698Sdimbool SampleProfileLoader::doInitialization(Module &M) { 468259698Sdim Profiler.reset(new SampleProfile(Filename)); 469259698Sdim Profiler->loadText(); 470259698Sdim return true; 471259698Sdim} 472259698Sdim 473259698SdimFunctionPass *llvm::createSampleProfileLoaderPass() { 474259698Sdim return new SampleProfileLoader(SampleProfileFile); 475259698Sdim} 476259698Sdim 477259698SdimFunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) { 478259698Sdim return new SampleProfileLoader(Name); 479259698Sdim} 480