xray-converter.cpp revision 344779
1336809Sdim//===- xray-converter.cpp: XRay Trace Conversion --------------------------===// 2336809Sdim// 3336809Sdim// The LLVM Compiler Infrastructure 4336809Sdim// 5336809Sdim// This file is distributed under the University of Illinois Open Source 6336809Sdim// License. See LICENSE.TXT for details. 7336809Sdim// 8336809Sdim//===----------------------------------------------------------------------===// 9336809Sdim// 10336809Sdim// Implements the trace conversion functions. 11336809Sdim// 12336809Sdim//===----------------------------------------------------------------------===// 13336809Sdim#include "xray-converter.h" 14336809Sdim 15336809Sdim#include "trie-node.h" 16336809Sdim#include "xray-registry.h" 17336809Sdim#include "llvm/DebugInfo/Symbolize/Symbolize.h" 18336809Sdim#include "llvm/Support/EndianStream.h" 19336809Sdim#include "llvm/Support/FileSystem.h" 20336809Sdim#include "llvm/Support/FormatVariadic.h" 21336809Sdim#include "llvm/Support/ScopedPrinter.h" 22336809Sdim#include "llvm/Support/YAMLTraits.h" 23336809Sdim#include "llvm/Support/raw_ostream.h" 24336809Sdim#include "llvm/XRay/InstrumentationMap.h" 25336809Sdim#include "llvm/XRay/Trace.h" 26336809Sdim#include "llvm/XRay/YAMLXRayRecord.h" 27336809Sdim 28336809Sdimusing namespace llvm; 29336809Sdimusing namespace xray; 30336809Sdim 31336809Sdim// llvm-xray convert 32336809Sdim// ---------------------------------------------------------------------------- 33336809Sdimstatic cl::SubCommand Convert("convert", "Trace Format Conversion"); 34336809Sdimstatic cl::opt<std::string> ConvertInput(cl::Positional, 35336809Sdim cl::desc("<xray log file>"), 36336809Sdim cl::Required, cl::sub(Convert)); 37336809Sdimenum class ConvertFormats { BINARY, YAML, CHROME_TRACE_EVENT }; 38336809Sdimstatic cl::opt<ConvertFormats> ConvertOutputFormat( 39336809Sdim "output-format", cl::desc("output format"), 40336809Sdim cl::values(clEnumValN(ConvertFormats::BINARY, "raw", "output in binary"), 41336809Sdim clEnumValN(ConvertFormats::YAML, "yaml", "output in yaml"), 42336809Sdim clEnumValN(ConvertFormats::CHROME_TRACE_EVENT, "trace_event", 43336809Sdim "Output in chrome's trace event format. " 44336809Sdim "May be visualized with the Catapult trace viewer.")), 45336809Sdim cl::sub(Convert)); 46336809Sdimstatic cl::alias ConvertOutputFormat2("f", cl::aliasopt(ConvertOutputFormat), 47336809Sdim cl::desc("Alias for -output-format"), 48336809Sdim cl::sub(Convert)); 49336809Sdimstatic cl::opt<std::string> 50336809Sdim ConvertOutput("output", cl::value_desc("output file"), cl::init("-"), 51336809Sdim cl::desc("output file; use '-' for stdout"), 52336809Sdim cl::sub(Convert)); 53336809Sdimstatic cl::alias ConvertOutput2("o", cl::aliasopt(ConvertOutput), 54336809Sdim cl::desc("Alias for -output"), 55336809Sdim cl::sub(Convert)); 56336809Sdim 57336809Sdimstatic cl::opt<bool> 58336809Sdim ConvertSymbolize("symbolize", 59336809Sdim cl::desc("symbolize function ids from the input log"), 60336809Sdim cl::init(false), cl::sub(Convert)); 61336809Sdimstatic cl::alias ConvertSymbolize2("y", cl::aliasopt(ConvertSymbolize), 62336809Sdim cl::desc("Alias for -symbolize"), 63336809Sdim cl::sub(Convert)); 64336809Sdim 65336809Sdimstatic cl::opt<std::string> 66336809Sdim ConvertInstrMap("instr_map", 67336809Sdim cl::desc("binary with the instrumentation map, or " 68336809Sdim "a separate instrumentation map"), 69336809Sdim cl::value_desc("binary with xray_instr_map"), 70336809Sdim cl::sub(Convert), cl::init("")); 71336809Sdimstatic cl::alias ConvertInstrMap2("m", cl::aliasopt(ConvertInstrMap), 72336809Sdim cl::desc("Alias for -instr_map"), 73336809Sdim cl::sub(Convert)); 74336809Sdimstatic cl::opt<bool> ConvertSortInput( 75336809Sdim "sort", 76336809Sdim cl::desc("determines whether to sort input log records by timestamp"), 77336809Sdim cl::sub(Convert), cl::init(true)); 78336809Sdimstatic cl::alias ConvertSortInput2("s", cl::aliasopt(ConvertSortInput), 79336809Sdim cl::desc("Alias for -sort"), 80336809Sdim cl::sub(Convert)); 81336809Sdim 82336809Sdimusing llvm::yaml::Output; 83336809Sdim 84336809Sdimvoid TraceConverter::exportAsYAML(const Trace &Records, raw_ostream &OS) { 85336809Sdim YAMLXRayTrace Trace; 86336809Sdim const auto &FH = Records.getFileHeader(); 87336809Sdim Trace.Header = {FH.Version, FH.Type, FH.ConstantTSC, FH.NonstopTSC, 88336809Sdim FH.CycleFrequency}; 89336809Sdim Trace.Records.reserve(Records.size()); 90336809Sdim for (const auto &R : Records) { 91336809Sdim Trace.Records.push_back({R.RecordType, R.CPU, R.Type, R.FuncId, 92336809Sdim Symbolize ? FuncIdHelper.SymbolOrNumber(R.FuncId) 93336809Sdim : llvm::to_string(R.FuncId), 94344779Sdim R.TSC, R.TId, R.PId, R.CallArgs, R.Data}); 95336809Sdim } 96336809Sdim Output Out(OS, nullptr, 0); 97344779Sdim Out.setWriteDefaultValues(false); 98336809Sdim Out << Trace; 99336809Sdim} 100336809Sdim 101336809Sdimvoid TraceConverter::exportAsRAWv1(const Trace &Records, raw_ostream &OS) { 102336809Sdim // First write out the file header, in the correct endian-appropriate format 103336809Sdim // (XRay assumes currently little endian). 104336809Sdim support::endian::Writer Writer(OS, support::endianness::little); 105336809Sdim const auto &FH = Records.getFileHeader(); 106336809Sdim Writer.write(FH.Version); 107336809Sdim Writer.write(FH.Type); 108336809Sdim uint32_t Bitfield{0}; 109336809Sdim if (FH.ConstantTSC) 110336809Sdim Bitfield |= 1uL; 111336809Sdim if (FH.NonstopTSC) 112336809Sdim Bitfield |= 1uL << 1; 113336809Sdim Writer.write(Bitfield); 114336809Sdim Writer.write(FH.CycleFrequency); 115336809Sdim 116336809Sdim // There's 16 bytes of padding at the end of the file header. 117336809Sdim static constexpr uint32_t Padding4B = 0; 118336809Sdim Writer.write(Padding4B); 119336809Sdim Writer.write(Padding4B); 120336809Sdim Writer.write(Padding4B); 121336809Sdim Writer.write(Padding4B); 122336809Sdim 123336809Sdim // Then write out the rest of the records, still in an endian-appropriate 124336809Sdim // format. 125336809Sdim for (const auto &R : Records) { 126336809Sdim switch (R.Type) { 127336809Sdim case RecordTypes::ENTER: 128336809Sdim case RecordTypes::ENTER_ARG: 129344779Sdim Writer.write(R.RecordType); 130344779Sdim Writer.write(static_cast<uint8_t>(R.CPU)); 131336809Sdim Writer.write(uint8_t{0}); 132336809Sdim break; 133336809Sdim case RecordTypes::EXIT: 134344779Sdim Writer.write(R.RecordType); 135344779Sdim Writer.write(static_cast<uint8_t>(R.CPU)); 136336809Sdim Writer.write(uint8_t{1}); 137336809Sdim break; 138336809Sdim case RecordTypes::TAIL_EXIT: 139344779Sdim Writer.write(R.RecordType); 140344779Sdim Writer.write(static_cast<uint8_t>(R.CPU)); 141336809Sdim Writer.write(uint8_t{2}); 142336809Sdim break; 143344779Sdim case RecordTypes::CUSTOM_EVENT: 144344779Sdim case RecordTypes::TYPED_EVENT: 145344779Sdim // Skip custom and typed event records for v1 logs. 146344779Sdim continue; 147336809Sdim } 148336809Sdim Writer.write(R.FuncId); 149336809Sdim Writer.write(R.TSC); 150336809Sdim Writer.write(R.TId); 151336809Sdim 152336809Sdim if (FH.Version >= 3) 153336809Sdim Writer.write(R.PId); 154336809Sdim else 155336809Sdim Writer.write(Padding4B); 156336809Sdim 157336809Sdim Writer.write(Padding4B); 158336809Sdim Writer.write(Padding4B); 159336809Sdim } 160336809Sdim} 161336809Sdim 162336809Sdimnamespace { 163336809Sdim 164336809Sdim// A structure that allows building a dictionary of stack ids for the Chrome 165336809Sdim// trace event format. 166336809Sdimstruct StackIdData { 167336809Sdim // Each Stack of function calls has a unique ID. 168336809Sdim unsigned id; 169336809Sdim 170336809Sdim // Bookkeeping so that IDs can be maintained uniquely across threads. 171336809Sdim // Traversal keeps sibling pointers to other threads stacks. This is helpful 172336809Sdim // to determine when a thread encounters a new stack and should assign a new 173336809Sdim // unique ID. 174336809Sdim SmallVector<TrieNode<StackIdData> *, 4> siblings; 175336809Sdim}; 176336809Sdim 177336809Sdimusing StackTrieNode = TrieNode<StackIdData>; 178336809Sdim 179336809Sdim// A helper function to find the sibling nodes for an encountered function in a 180336809Sdim// thread of execution. Relies on the invariant that each time a new node is 181336809Sdim// traversed in a thread, sibling bidirectional pointers are maintained. 182336809SdimSmallVector<StackTrieNode *, 4> 183336809SdimfindSiblings(StackTrieNode *parent, int32_t FnId, uint32_t TId, 184336809Sdim const DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> 185336809Sdim &StackRootsByThreadId) { 186336809Sdim 187336809Sdim SmallVector<StackTrieNode *, 4> Siblings{}; 188336809Sdim 189336809Sdim if (parent == nullptr) { 190336809Sdim for (auto map_iter : StackRootsByThreadId) { 191336809Sdim // Only look for siblings in other threads. 192336809Sdim if (map_iter.first != TId) 193336809Sdim for (auto node_iter : map_iter.second) { 194336809Sdim if (node_iter->FuncId == FnId) 195336809Sdim Siblings.push_back(node_iter); 196336809Sdim } 197336809Sdim } 198336809Sdim return Siblings; 199336809Sdim } 200336809Sdim 201336809Sdim for (auto *ParentSibling : parent->ExtraData.siblings) 202336809Sdim for (auto node_iter : ParentSibling->Callees) 203336809Sdim if (node_iter->FuncId == FnId) 204336809Sdim Siblings.push_back(node_iter); 205336809Sdim 206336809Sdim return Siblings; 207336809Sdim} 208336809Sdim 209336809Sdim// Given a function being invoked in a thread with id TId, finds and returns the 210336809Sdim// StackTrie representing the function call stack. If no node exists, creates 211336809Sdim// the node. Assigns unique IDs to stacks newly encountered among all threads 212336809Sdim// and keeps sibling links up to when creating new nodes. 213336809SdimStackTrieNode *findOrCreateStackNode( 214336809Sdim StackTrieNode *Parent, int32_t FuncId, uint32_t TId, 215336809Sdim DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> &StackRootsByThreadId, 216336809Sdim DenseMap<unsigned, StackTrieNode *> &StacksByStackId, unsigned *id_counter, 217336809Sdim std::forward_list<StackTrieNode> &NodeStore) { 218336809Sdim SmallVector<StackTrieNode *, 4> &ParentCallees = 219336809Sdim Parent == nullptr ? StackRootsByThreadId[TId] : Parent->Callees; 220336809Sdim auto match = find_if(ParentCallees, [FuncId](StackTrieNode *ParentCallee) { 221336809Sdim return FuncId == ParentCallee->FuncId; 222336809Sdim }); 223336809Sdim if (match != ParentCallees.end()) 224336809Sdim return *match; 225336809Sdim 226336809Sdim SmallVector<StackTrieNode *, 4> siblings = 227336809Sdim findSiblings(Parent, FuncId, TId, StackRootsByThreadId); 228336809Sdim if (siblings.empty()) { 229336809Sdim NodeStore.push_front({FuncId, Parent, {}, {(*id_counter)++, {}}}); 230336809Sdim StackTrieNode *CurrentStack = &NodeStore.front(); 231336809Sdim StacksByStackId[*id_counter - 1] = CurrentStack; 232336809Sdim ParentCallees.push_back(CurrentStack); 233336809Sdim return CurrentStack; 234336809Sdim } 235336809Sdim unsigned stack_id = siblings[0]->ExtraData.id; 236336809Sdim NodeStore.push_front({FuncId, Parent, {}, {stack_id, std::move(siblings)}}); 237336809Sdim StackTrieNode *CurrentStack = &NodeStore.front(); 238336809Sdim for (auto *sibling : CurrentStack->ExtraData.siblings) 239336809Sdim sibling->ExtraData.siblings.push_back(CurrentStack); 240336809Sdim ParentCallees.push_back(CurrentStack); 241336809Sdim return CurrentStack; 242336809Sdim} 243336809Sdim 244336809Sdimvoid writeTraceViewerRecord(uint16_t Version, raw_ostream &OS, int32_t FuncId, 245336809Sdim uint32_t TId, uint32_t PId, bool Symbolize, 246336809Sdim const FuncIdConversionHelper &FuncIdHelper, 247336809Sdim double EventTimestampUs, 248336809Sdim const StackTrieNode &StackCursor, 249336809Sdim StringRef FunctionPhenotype) { 250336809Sdim OS << " "; 251336809Sdim if (Version >= 3) { 252336809Sdim OS << llvm::formatv( 253336809Sdim R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "{3}", )" 254336809Sdim R"("ts" : "{4:f4}", "sf" : "{5}" })", 255336809Sdim (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId) 256336809Sdim : llvm::to_string(FuncId)), 257336809Sdim FunctionPhenotype, TId, PId, EventTimestampUs, 258336809Sdim StackCursor.ExtraData.id); 259336809Sdim } else { 260336809Sdim OS << llvm::formatv( 261336809Sdim R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )" 262336809Sdim R"("ts" : "{3:f3}", "sf" : "{4}" })", 263336809Sdim (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId) 264336809Sdim : llvm::to_string(FuncId)), 265336809Sdim FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id); 266336809Sdim } 267336809Sdim} 268336809Sdim 269336809Sdim} // namespace 270336809Sdim 271336809Sdimvoid TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records, 272336809Sdim raw_ostream &OS) { 273336809Sdim const auto &FH = Records.getFileHeader(); 274336809Sdim auto Version = FH.Version; 275336809Sdim auto CycleFreq = FH.CycleFrequency; 276336809Sdim 277336809Sdim unsigned id_counter = 0; 278336809Sdim 279336809Sdim OS << "{\n \"traceEvents\": ["; 280336809Sdim DenseMap<uint32_t, StackTrieNode *> StackCursorByThreadId{}; 281336809Sdim DenseMap<uint32_t, SmallVector<StackTrieNode *, 4>> StackRootsByThreadId{}; 282336809Sdim DenseMap<unsigned, StackTrieNode *> StacksByStackId{}; 283336809Sdim std::forward_list<StackTrieNode> NodeStore{}; 284336809Sdim int loop_count = 0; 285336809Sdim for (const auto &R : Records) { 286336809Sdim if (loop_count++ == 0) 287336809Sdim OS << "\n"; 288336809Sdim else 289336809Sdim OS << ",\n"; 290336809Sdim 291336809Sdim // Chrome trace event format always wants data in micros. 292336809Sdim // CyclesPerMicro = CycleHertz / 10^6 293336809Sdim // TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp 294336809Sdim // Could lose some precision here by converting the TSC to a double to 295336809Sdim // multiply by the period in micros. 52 bit mantissa is a good start though. 296336809Sdim // TODO: Make feature request to Chrome Trace viewer to accept ticks and a 297336809Sdim // frequency or do some more involved calculation to avoid dangers of 298336809Sdim // conversion. 299336809Sdim double EventTimestampUs = double(1000000) / CycleFreq * double(R.TSC); 300336809Sdim StackTrieNode *&StackCursor = StackCursorByThreadId[R.TId]; 301336809Sdim switch (R.Type) { 302344779Sdim case RecordTypes::CUSTOM_EVENT: 303344779Sdim case RecordTypes::TYPED_EVENT: 304344779Sdim // TODO: Support typed and custom event rendering on Chrome Trace Viewer. 305344779Sdim break; 306336809Sdim case RecordTypes::ENTER: 307336809Sdim case RecordTypes::ENTER_ARG: 308336809Sdim StackCursor = findOrCreateStackNode(StackCursor, R.FuncId, R.TId, 309336809Sdim StackRootsByThreadId, StacksByStackId, 310336809Sdim &id_counter, NodeStore); 311336809Sdim // Each record is represented as a json dictionary with function name, 312336809Sdim // type of B for begin or E for end, thread id, process id, 313336809Sdim // timestamp in microseconds, and a stack frame id. The ids are logged 314336809Sdim // in an id dictionary after the events. 315336809Sdim writeTraceViewerRecord(Version, OS, R.FuncId, R.TId, R.PId, Symbolize, 316336809Sdim FuncIdHelper, EventTimestampUs, *StackCursor, "B"); 317336809Sdim break; 318336809Sdim case RecordTypes::EXIT: 319336809Sdim case RecordTypes::TAIL_EXIT: 320336809Sdim // No entries to record end for. 321336809Sdim if (StackCursor == nullptr) 322336809Sdim break; 323336809Sdim // Should we emit an END record anyway or account this condition? 324336809Sdim // (And/Or in loop termination below) 325336809Sdim StackTrieNode *PreviousCursor = nullptr; 326336809Sdim do { 327336809Sdim if (PreviousCursor != nullptr) { 328336809Sdim OS << ",\n"; 329336809Sdim } 330336809Sdim writeTraceViewerRecord(Version, OS, StackCursor->FuncId, R.TId, R.PId, 331336809Sdim Symbolize, FuncIdHelper, EventTimestampUs, 332336809Sdim *StackCursor, "E"); 333336809Sdim PreviousCursor = StackCursor; 334336809Sdim StackCursor = StackCursor->Parent; 335336809Sdim } while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr); 336336809Sdim break; 337336809Sdim } 338336809Sdim } 339336809Sdim OS << "\n ],\n"; // Close the Trace Events array. 340336809Sdim OS << " " 341336809Sdim << "\"displayTimeUnit\": \"ns\",\n"; 342336809Sdim 343336809Sdim // The stackFrames dictionary substantially reduces size of the output file by 344336809Sdim // avoiding repeating the entire call stack of function names for each entry. 345336809Sdim OS << R"( "stackFrames": {)"; 346336809Sdim int stack_frame_count = 0; 347336809Sdim for (auto map_iter : StacksByStackId) { 348336809Sdim if (stack_frame_count++ == 0) 349336809Sdim OS << "\n"; 350336809Sdim else 351336809Sdim OS << ",\n"; 352336809Sdim OS << " "; 353336809Sdim OS << llvm::formatv( 354336809Sdim R"("{0}" : { "name" : "{1}")", map_iter.first, 355336809Sdim (Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId) 356336809Sdim : llvm::to_string(map_iter.second->FuncId))); 357336809Sdim if (map_iter.second->Parent != nullptr) 358336809Sdim OS << llvm::formatv(R"(, "parent": "{0}")", 359336809Sdim map_iter.second->Parent->ExtraData.id); 360336809Sdim OS << " }"; 361336809Sdim } 362336809Sdim OS << "\n }\n"; // Close the stack frames map. 363336809Sdim OS << "}\n"; // Close the JSON entry. 364336809Sdim} 365336809Sdim 366336809Sdimnamespace llvm { 367336809Sdimnamespace xray { 368336809Sdim 369336809Sdimstatic CommandRegistration Unused(&Convert, []() -> Error { 370336809Sdim // FIXME: Support conversion to BINARY when upgrading XRay trace versions. 371336809Sdim InstrumentationMap Map; 372336809Sdim if (!ConvertInstrMap.empty()) { 373336809Sdim auto InstrumentationMapOrError = loadInstrumentationMap(ConvertInstrMap); 374336809Sdim if (!InstrumentationMapOrError) 375336809Sdim return joinErrors(make_error<StringError>( 376336809Sdim Twine("Cannot open instrumentation map '") + 377336809Sdim ConvertInstrMap + "'", 378336809Sdim std::make_error_code(std::errc::invalid_argument)), 379336809Sdim InstrumentationMapOrError.takeError()); 380336809Sdim Map = std::move(*InstrumentationMapOrError); 381336809Sdim } 382336809Sdim 383336809Sdim const auto &FunctionAddresses = Map.getFunctionAddresses(); 384336809Sdim symbolize::LLVMSymbolizer::Options Opts( 385336809Sdim symbolize::FunctionNameKind::LinkageName, true, true, false, ""); 386336809Sdim symbolize::LLVMSymbolizer Symbolizer(Opts); 387336809Sdim llvm::xray::FuncIdConversionHelper FuncIdHelper(ConvertInstrMap, Symbolizer, 388336809Sdim FunctionAddresses); 389336809Sdim llvm::xray::TraceConverter TC(FuncIdHelper, ConvertSymbolize); 390336809Sdim std::error_code EC; 391336809Sdim raw_fd_ostream OS(ConvertOutput, EC, 392336809Sdim ConvertOutputFormat == ConvertFormats::BINARY 393336809Sdim ? sys::fs::OpenFlags::F_None 394336809Sdim : sys::fs::OpenFlags::F_Text); 395336809Sdim if (EC) 396336809Sdim return make_error<StringError>( 397336809Sdim Twine("Cannot open file '") + ConvertOutput + "' for writing.", EC); 398336809Sdim 399336809Sdim auto TraceOrErr = loadTraceFile(ConvertInput, ConvertSortInput); 400336809Sdim if (!TraceOrErr) 401336809Sdim return joinErrors( 402336809Sdim make_error<StringError>( 403336809Sdim Twine("Failed loading input file '") + ConvertInput + "'.", 404336809Sdim std::make_error_code(std::errc::executable_format_error)), 405336809Sdim TraceOrErr.takeError()); 406336809Sdim 407336809Sdim auto &T = *TraceOrErr; 408336809Sdim switch (ConvertOutputFormat) { 409336809Sdim case ConvertFormats::YAML: 410336809Sdim TC.exportAsYAML(T, OS); 411336809Sdim break; 412336809Sdim case ConvertFormats::BINARY: 413336809Sdim TC.exportAsRAWv1(T, OS); 414336809Sdim break; 415336809Sdim case ConvertFormats::CHROME_TRACE_EVENT: 416336809Sdim TC.exportAsChromeTraceEventFormat(T, OS); 417336809Sdim break; 418336809Sdim } 419336809Sdim return Error::success(); 420336809Sdim}); 421336809Sdim 422336809Sdim} // namespace xray 423336809Sdim} // namespace llvm 424