BreakpadRecords.cpp revision 360784
1//===-- BreakpadRecords.cpp ----------------------------------- -*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h" 10#include "llvm/ADT/StringExtras.h" 11#include "llvm/ADT/StringSwitch.h" 12#include "llvm/Support/Endian.h" 13#include "llvm/Support/FormatVariadic.h" 14 15using namespace lldb_private; 16using namespace lldb_private::breakpad; 17 18namespace { 19enum class Token { 20 Unknown, 21 Module, 22 Info, 23 CodeID, 24 File, 25 Func, 26 Public, 27 Stack, 28 CFI, 29 Init, 30 Win, 31}; 32} 33 34template<typename T> 35static T stringTo(llvm::StringRef Str); 36 37template <> Token stringTo<Token>(llvm::StringRef Str) { 38 return llvm::StringSwitch<Token>(Str) 39 .Case("MODULE", Token::Module) 40 .Case("INFO", Token::Info) 41 .Case("CODE_ID", Token::CodeID) 42 .Case("FILE", Token::File) 43 .Case("FUNC", Token::Func) 44 .Case("PUBLIC", Token::Public) 45 .Case("STACK", Token::Stack) 46 .Case("CFI", Token::CFI) 47 .Case("INIT", Token::Init) 48 .Case("WIN", Token::Win) 49 .Default(Token::Unknown); 50} 51 52template <> 53llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) { 54 using llvm::Triple; 55 return llvm::StringSwitch<Triple::OSType>(Str) 56 .Case("Linux", Triple::Linux) 57 .Case("mac", Triple::MacOSX) 58 .Case("windows", Triple::Win32) 59 .Default(Triple::UnknownOS); 60} 61 62template <> 63llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) { 64 using llvm::Triple; 65 return llvm::StringSwitch<Triple::ArchType>(Str) 66 .Case("arm", Triple::arm) 67 .Cases("arm64", "arm64e", Triple::aarch64) 68 .Case("mips", Triple::mips) 69 .Case("ppc", Triple::ppc) 70 .Case("ppc64", Triple::ppc64) 71 .Case("s390", Triple::systemz) 72 .Case("sparc", Triple::sparc) 73 .Case("sparcv9", Triple::sparcv9) 74 .Case("x86", Triple::x86) 75 .Cases("x86_64", "x86_64h", Triple::x86_64) 76 .Default(Triple::UnknownArch); 77} 78 79template<typename T> 80static T consume(llvm::StringRef &Str) { 81 llvm::StringRef Token; 82 std::tie(Token, Str) = getToken(Str); 83 return stringTo<T>(Token); 84} 85 86/// Return the number of hex digits needed to encode an (POD) object of a given 87/// type. 88template <typename T> static constexpr size_t hex_digits() { 89 return 2 * sizeof(T); 90} 91 92static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) { 93 struct data_t { 94 using uuid_t = uint8_t[16]; 95 uuid_t uuid; 96 llvm::support::ubig32_t age; 97 } data; 98 static_assert(sizeof(data) == 20, ""); 99 // The textual module id encoding should be between 33 and 40 bytes long, 100 // depending on the size of the age field, which is of variable length. 101 // The first three chunks of the id are encoded in big endian, so we need to 102 // byte-swap those. 103 if (str.size() <= hex_digits<data_t::uuid_t>() || 104 str.size() > hex_digits<data_t>()) 105 return UUID(); 106 if (!all_of(str, llvm::isHexDigit)) 107 return UUID(); 108 109 llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>()); 110 llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>()); 111 112 llvm::copy(fromHex(uuid_str), data.uuid); 113 uint32_t age; 114 bool success = to_integer(age_str, age, 16); 115 assert(success); 116 (void)success; 117 data.age = age; 118 119 // On non-windows, the age field should always be zero, so we don't include to 120 // match the native uuid format of these platforms. 121 return UUID::fromData(&data, os == llvm::Triple::Win32 ? sizeof(data) 122 : sizeof(data.uuid)); 123} 124 125llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) { 126 Token Tok = consume<Token>(Line); 127 switch (Tok) { 128 case Token::Module: 129 return Record::Module; 130 case Token::Info: 131 return Record::Info; 132 case Token::File: 133 return Record::File; 134 case Token::Func: 135 return Record::Func; 136 case Token::Public: 137 return Record::Public; 138 case Token::Stack: 139 Tok = consume<Token>(Line); 140 switch (Tok) { 141 case Token::CFI: 142 return Record::StackCFI; 143 case Token::Win: 144 return Record::StackWin; 145 default: 146 return llvm::None; 147 } 148 149 case Token::Unknown: 150 // Optimistically assume that any unrecognised token means this is a line 151 // record, those don't have a special keyword and start directly with a 152 // hex number. 153 return Record::Line; 154 155 case Token::CodeID: 156 case Token::CFI: 157 case Token::Init: 158 case Token::Win: 159 // These should never appear at the start of a valid record. 160 return llvm::None; 161 } 162 llvm_unreachable("Fully covered switch above!"); 163} 164 165llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) { 166 // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out 167 if (consume<Token>(Line) != Token::Module) 168 return llvm::None; 169 170 llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line); 171 if (OS == llvm::Triple::UnknownOS) 172 return llvm::None; 173 174 llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line); 175 if (Arch == llvm::Triple::UnknownArch) 176 return llvm::None; 177 178 llvm::StringRef Str; 179 std::tie(Str, Line) = getToken(Line); 180 UUID ID = parseModuleId(OS, Str); 181 if (!ID) 182 return llvm::None; 183 184 return ModuleRecord(OS, Arch, std::move(ID)); 185} 186 187llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 188 const ModuleRecord &R) { 189 return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " " 190 << llvm::Triple::getArchTypeName(R.Arch) << " " 191 << R.ID.GetAsString(); 192} 193 194llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) { 195 // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe] 196 if (consume<Token>(Line) != Token::Info) 197 return llvm::None; 198 199 if (consume<Token>(Line) != Token::CodeID) 200 return llvm::None; 201 202 llvm::StringRef Str; 203 std::tie(Str, Line) = getToken(Line); 204 // If we don't have any text following the code ID (e.g. on linux), we should 205 // use this as the UUID. Otherwise, we should revert back to the module ID. 206 UUID ID; 207 if (Line.trim().empty()) { 208 if (Str.empty() || ID.SetFromStringRef(Str, Str.size() / 2) != Str.size()) 209 return llvm::None; 210 } 211 return InfoRecord(std::move(ID)); 212} 213 214llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 215 const InfoRecord &R) { 216 return OS << "INFO CODE_ID " << R.ID.GetAsString(); 217} 218 219llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) { 220 // FILE number name 221 if (consume<Token>(Line) != Token::File) 222 return llvm::None; 223 224 llvm::StringRef Str; 225 size_t Number; 226 std::tie(Str, Line) = getToken(Line); 227 if (!to_integer(Str, Number)) 228 return llvm::None; 229 230 llvm::StringRef Name = Line.trim(); 231 if (Name.empty()) 232 return llvm::None; 233 234 return FileRecord(Number, Name); 235} 236 237llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 238 const FileRecord &R) { 239 return OS << "FILE " << R.Number << " " << R.Name; 240} 241 242static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple, 243 lldb::addr_t &Address, lldb::addr_t *Size, 244 lldb::addr_t &ParamSize, llvm::StringRef &Name) { 245 // PUBLIC [m] address param_size name 246 // or 247 // FUNC [m] address size param_size name 248 249 Token Tok = Size ? Token::Func : Token::Public; 250 251 if (consume<Token>(Line) != Tok) 252 return false; 253 254 llvm::StringRef Str; 255 std::tie(Str, Line) = getToken(Line); 256 Multiple = Str == "m"; 257 258 if (Multiple) 259 std::tie(Str, Line) = getToken(Line); 260 if (!to_integer(Str, Address, 16)) 261 return false; 262 263 if (Tok == Token::Func) { 264 std::tie(Str, Line) = getToken(Line); 265 if (!to_integer(Str, *Size, 16)) 266 return false; 267 } 268 269 std::tie(Str, Line) = getToken(Line); 270 if (!to_integer(Str, ParamSize, 16)) 271 return false; 272 273 Name = Line.trim(); 274 if (Name.empty()) 275 return false; 276 277 return true; 278} 279 280llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) { 281 bool Multiple; 282 lldb::addr_t Address, Size, ParamSize; 283 llvm::StringRef Name; 284 285 if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name)) 286 return FuncRecord(Multiple, Address, Size, ParamSize, Name); 287 288 return llvm::None; 289} 290 291bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) { 292 return L.Multiple == R.Multiple && L.Address == R.Address && 293 L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name; 294} 295llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 296 const FuncRecord &R) { 297 return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}", 298 R.Multiple ? "m " : "", R.Address, R.Size, 299 R.ParamSize, R.Name); 300} 301 302llvm::Optional<LineRecord> LineRecord::parse(llvm::StringRef Line) { 303 lldb::addr_t Address; 304 llvm::StringRef Str; 305 std::tie(Str, Line) = getToken(Line); 306 if (!to_integer(Str, Address, 16)) 307 return llvm::None; 308 309 lldb::addr_t Size; 310 std::tie(Str, Line) = getToken(Line); 311 if (!to_integer(Str, Size, 16)) 312 return llvm::None; 313 314 uint32_t LineNum; 315 std::tie(Str, Line) = getToken(Line); 316 if (!to_integer(Str, LineNum)) 317 return llvm::None; 318 319 size_t FileNum; 320 std::tie(Str, Line) = getToken(Line); 321 if (!to_integer(Str, FileNum)) 322 return llvm::None; 323 324 return LineRecord(Address, Size, LineNum, FileNum); 325} 326 327bool breakpad::operator==(const LineRecord &L, const LineRecord &R) { 328 return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum && 329 L.FileNum == R.FileNum; 330} 331llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 332 const LineRecord &R) { 333 return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size, 334 R.LineNum, R.FileNum); 335} 336 337llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) { 338 bool Multiple; 339 lldb::addr_t Address, ParamSize; 340 llvm::StringRef Name; 341 342 if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name)) 343 return PublicRecord(Multiple, Address, ParamSize, Name); 344 345 return llvm::None; 346} 347 348bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) { 349 return L.Multiple == R.Multiple && L.Address == R.Address && 350 L.ParamSize == R.ParamSize && L.Name == R.Name; 351} 352llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 353 const PublicRecord &R) { 354 return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}", 355 R.Multiple ? "m " : "", R.Address, R.ParamSize, 356 R.Name); 357} 358 359llvm::Optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) { 360 // STACK CFI INIT address size reg1: expr1 reg2: expr2 ... 361 // or 362 // STACK CFI address reg1: expr1 reg2: expr2 ... 363 // No token in exprN ends with a colon. 364 365 if (consume<Token>(Line) != Token::Stack) 366 return llvm::None; 367 if (consume<Token>(Line) != Token::CFI) 368 return llvm::None; 369 370 llvm::StringRef Str; 371 std::tie(Str, Line) = getToken(Line); 372 373 bool IsInitRecord = stringTo<Token>(Str) == Token::Init; 374 if (IsInitRecord) 375 std::tie(Str, Line) = getToken(Line); 376 377 lldb::addr_t Address; 378 if (!to_integer(Str, Address, 16)) 379 return llvm::None; 380 381 llvm::Optional<lldb::addr_t> Size; 382 if (IsInitRecord) { 383 Size.emplace(); 384 std::tie(Str, Line) = getToken(Line); 385 if (!to_integer(Str, *Size, 16)) 386 return llvm::None; 387 } 388 389 return StackCFIRecord(Address, Size, Line.trim()); 390} 391 392bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) { 393 return L.Address == R.Address && L.Size == R.Size && 394 L.UnwindRules == R.UnwindRules; 395} 396 397llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 398 const StackCFIRecord &R) { 399 OS << "STACK CFI "; 400 if (R.Size) 401 OS << "INIT "; 402 OS << llvm::formatv("{0:x-} ", R.Address); 403 if (R.Size) 404 OS << llvm::formatv("{0:x-} ", *R.Size); 405 return OS << " " << R.UnwindRules; 406} 407 408llvm::Optional<StackWinRecord> StackWinRecord::parse(llvm::StringRef Line) { 409 // STACK WIN type rva code_size prologue_size epilogue_size parameter_size 410 // saved_register_size local_size max_stack_size has_program_string 411 // program_string_OR_allocates_base_pointer 412 413 if (consume<Token>(Line) != Token::Stack) 414 return llvm::None; 415 if (consume<Token>(Line) != Token::Win) 416 return llvm::None; 417 418 llvm::StringRef Str; 419 uint8_t Type; 420 std::tie(Str, Line) = getToken(Line); 421 // Right now we only support the "FrameData" frame type. 422 if (!to_integer(Str, Type) || FrameType(Type) != FrameType::FrameData) 423 return llvm::None; 424 425 lldb::addr_t RVA; 426 std::tie(Str, Line) = getToken(Line); 427 if (!to_integer(Str, RVA, 16)) 428 return llvm::None; 429 430 lldb::addr_t CodeSize; 431 std::tie(Str, Line) = getToken(Line); 432 if (!to_integer(Str, CodeSize, 16)) 433 return llvm::None; 434 435 // Skip fields which we aren't using right now. 436 std::tie(Str, Line) = getToken(Line); // prologue_size 437 std::tie(Str, Line) = getToken(Line); // epilogue_size 438 439 lldb::addr_t ParameterSize; 440 std::tie(Str, Line) = getToken(Line); 441 if (!to_integer(Str, ParameterSize, 16)) 442 return llvm::None; 443 444 lldb::addr_t SavedRegisterSize; 445 std::tie(Str, Line) = getToken(Line); 446 if (!to_integer(Str, SavedRegisterSize, 16)) 447 return llvm::None; 448 449 lldb::addr_t LocalSize; 450 std::tie(Str, Line) = getToken(Line); 451 if (!to_integer(Str, LocalSize, 16)) 452 return llvm::None; 453 454 std::tie(Str, Line) = getToken(Line); // max_stack_size 455 456 uint8_t HasProgramString; 457 std::tie(Str, Line) = getToken(Line); 458 if (!to_integer(Str, HasProgramString)) 459 return llvm::None; 460 // FrameData records should always have a program string. 461 if (!HasProgramString) 462 return llvm::None; 463 464 return StackWinRecord(RVA, CodeSize, ParameterSize, SavedRegisterSize, 465 LocalSize, Line.trim()); 466} 467 468bool breakpad::operator==(const StackWinRecord &L, const StackWinRecord &R) { 469 return L.RVA == R.RVA && L.CodeSize == R.CodeSize && 470 L.ParameterSize == R.ParameterSize && 471 L.SavedRegisterSize == R.SavedRegisterSize && 472 L.LocalSize == R.LocalSize && L.ProgramString == R.ProgramString; 473} 474 475llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, 476 const StackWinRecord &R) { 477 return OS << llvm::formatv( 478 "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", R.RVA, 479 R.CodeSize, R.ParameterSize, R.SavedRegisterSize, R.LocalSize, 480 R.ProgramString); 481} 482 483llvm::StringRef breakpad::toString(Record::Kind K) { 484 switch (K) { 485 case Record::Module: 486 return "MODULE"; 487 case Record::Info: 488 return "INFO"; 489 case Record::File: 490 return "FILE"; 491 case Record::Func: 492 return "FUNC"; 493 case Record::Line: 494 return "LINE"; 495 case Record::Public: 496 return "PUBLIC"; 497 case Record::StackCFI: 498 return "STACK CFI"; 499 case Record::StackWin: 500 return "STACK WIN"; 501 } 502 llvm_unreachable("Unknown record kind!"); 503} 504