1//===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Builds up standard unix archive files (.a) containing LLVM bitcode. 11// 12//===----------------------------------------------------------------------===// 13 14#include "ArchiveInternals.h" 15#include "llvm/Bitcode/ReaderWriter.h" 16#include "llvm/Support/MemoryBuffer.h" 17#include "llvm/Module.h" 18#include <cstdlib> 19#include <memory> 20using namespace llvm; 21 22/// Read a variable-bit-rate encoded unsigned integer 23static inline unsigned readInteger(const char*&At, const char*End) { 24 unsigned Shift = 0; 25 unsigned Result = 0; 26 27 do { 28 if (At == End) 29 return Result; 30 Result |= (unsigned)((*At++) & 0x7F) << Shift; 31 Shift += 7; 32 } while (At[-1] & 0x80); 33 return Result; 34} 35 36// Completely parse the Archive's symbol table and populate symTab member var. 37bool 38Archive::parseSymbolTable(const void* data, unsigned size, std::string* error) { 39 const char* At = (const char*) data; 40 const char* End = At + size; 41 while (At < End) { 42 unsigned offset = readInteger(At, End); 43 if (At == End) { 44 if (error) 45 *error = "Ran out of data reading vbr_uint for symtab offset!"; 46 return false; 47 } 48 unsigned length = readInteger(At, End); 49 if (At == End) { 50 if (error) 51 *error = "Ran out of data reading vbr_uint for symtab length!"; 52 return false; 53 } 54 if (At + length > End) { 55 if (error) 56 *error = "Malformed symbol table: length not consistent with size"; 57 return false; 58 } 59 // we don't care if it can't be inserted (duplicate entry) 60 symTab.insert(std::make_pair(std::string(At, length), offset)); 61 At += length; 62 } 63 symTabSize = size; 64 return true; 65} 66 67// This member parses an ArchiveMemberHeader that is presumed to be pointed to 68// by At. The At pointer is updated to the byte just after the header, which 69// can be variable in size. 70ArchiveMember* 71Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) 72{ 73 if (At + sizeof(ArchiveMemberHeader) >= End) { 74 if (error) 75 *error = "Unexpected end of file"; 76 return 0; 77 } 78 79 // Cast archive member header 80 ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At; 81 At += sizeof(ArchiveMemberHeader); 82 83 // Extract the size and determine if the file is 84 // compressed or not (negative length). 85 int flags = 0; 86 int MemberSize = atoi(Hdr->size); 87 if (MemberSize < 0) { 88 flags |= ArchiveMember::CompressedFlag; 89 MemberSize = -MemberSize; 90 } 91 92 // Check the size of the member for sanity 93 if (At + MemberSize > End) { 94 if (error) 95 *error = "invalid member length in archive file"; 96 return 0; 97 } 98 99 // Check the member signature 100 if (!Hdr->checkSignature()) { 101 if (error) 102 *error = "invalid file member signature"; 103 return 0; 104 } 105 106 // Convert and check the member name 107 // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol 108 // table. The special name "//" and 14 blanks is for a string table, used 109 // for long file names. This library doesn't generate either of those but 110 // it will accept them. If the name starts with #1/ and the remainder is 111 // digits, then those digits specify the length of the name that is 112 // stored immediately following the header. The special name 113 // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bitcode. 114 // Anything else is a regular, short filename that is terminated with 115 // a '/' and blanks. 116 117 std::string pathname; 118 switch (Hdr->name[0]) { 119 case '#': 120 if (Hdr->name[1] == '1' && Hdr->name[2] == '/') { 121 if (isdigit(Hdr->name[3])) { 122 unsigned len = atoi(&Hdr->name[3]); 123 pathname.assign(At, len); 124 At += len; 125 MemberSize -= len; 126 flags |= ArchiveMember::HasLongFilenameFlag; 127 } else { 128 if (error) 129 *error = "invalid long filename"; 130 return 0; 131 } 132 } else if (Hdr->name[1] == '_' && 133 (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) { 134 // The member is using a long file name (>15 chars) format. 135 // This format is standard for 4.4BSD and Mac OSX operating 136 // systems. LLVM uses it similarly. In this format, the 137 // remainder of the name field (after #1/) specifies the 138 // length of the file name which occupy the first bytes of 139 // the member's data. The pathname already has the #1/ stripped. 140 pathname.assign(ARFILE_LLVM_SYMTAB_NAME); 141 flags |= ArchiveMember::LLVMSymbolTableFlag; 142 } 143 break; 144 case '/': 145 if (Hdr->name[1]== '/') { 146 if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) { 147 pathname.assign(ARFILE_STRTAB_NAME); 148 flags |= ArchiveMember::StringTableFlag; 149 } else { 150 if (error) 151 *error = "invalid string table name"; 152 return 0; 153 } 154 } else if (Hdr->name[1] == ' ') { 155 if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) { 156 pathname.assign(ARFILE_SVR4_SYMTAB_NAME); 157 flags |= ArchiveMember::SVR4SymbolTableFlag; 158 } else { 159 if (error) 160 *error = "invalid SVR4 symbol table name"; 161 return 0; 162 } 163 } else if (isdigit(Hdr->name[1])) { 164 unsigned index = atoi(&Hdr->name[1]); 165 if (index < strtab.length()) { 166 const char* namep = strtab.c_str() + index; 167 const char* endp = strtab.c_str() + strtab.length(); 168 const char* p = namep; 169 const char* last_p = p; 170 while (p < endp) { 171 if (*p == '\n' && *last_p == '/') { 172 pathname.assign(namep, last_p - namep); 173 flags |= ArchiveMember::HasLongFilenameFlag; 174 break; 175 } 176 last_p = p; 177 p++; 178 } 179 if (p >= endp) { 180 if (error) 181 *error = "missing name termiantor in string table"; 182 return 0; 183 } 184 } else { 185 if (error) 186 *error = "name index beyond string table"; 187 return 0; 188 } 189 } 190 break; 191 case '_': 192 if (Hdr->name[1] == '_' && 193 (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) { 194 pathname.assign(ARFILE_BSD4_SYMTAB_NAME); 195 flags |= ArchiveMember::BSD4SymbolTableFlag; 196 break; 197 } 198 /* FALL THROUGH */ 199 200 default: 201 char* slash = (char*) memchr(Hdr->name, '/', 16); 202 if (slash == 0) 203 slash = Hdr->name + 16; 204 pathname.assign(Hdr->name, slash - Hdr->name); 205 break; 206 } 207 208 // Determine if this is a bitcode file 209 switch (sys::IdentifyFileType(At, 4)) { 210 case sys::Bitcode_FileType: 211 flags |= ArchiveMember::BitcodeFlag; 212 break; 213 default: 214 flags &= ~ArchiveMember::BitcodeFlag; 215 break; 216 } 217 218 // Instantiate the ArchiveMember to be filled 219 ArchiveMember* member = new ArchiveMember(this); 220 221 // Fill in fields of the ArchiveMember 222 member->parent = this; 223 member->path.set(pathname); 224 member->info.fileSize = MemberSize; 225 member->info.modTime.fromEpochTime(atoi(Hdr->date)); 226 unsigned int mode; 227 sscanf(Hdr->mode, "%o", &mode); 228 member->info.mode = mode; 229 member->info.user = atoi(Hdr->uid); 230 member->info.group = atoi(Hdr->gid); 231 member->flags = flags; 232 member->data = At; 233 234 return member; 235} 236 237bool 238Archive::checkSignature(std::string* error) { 239 // Check the magic string at file's header 240 if (mapfile->getBufferSize() < 8 || memcmp(base, ARFILE_MAGIC, 8)) { 241 if (error) 242 *error = "invalid signature for an archive file"; 243 return false; 244 } 245 return true; 246} 247 248// This function loads the entire archive and fully populates its ilist with 249// the members of the archive file. This is typically used in preparation for 250// editing the contents of the archive. 251bool 252Archive::loadArchive(std::string* error) { 253 254 // Set up parsing 255 members.clear(); 256 symTab.clear(); 257 const char *At = base; 258 const char *End = mapfile->getBufferEnd(); 259 260 if (!checkSignature(error)) 261 return false; 262 263 At += 8; // Skip the magic string. 264 265 bool seenSymbolTable = false; 266 bool foundFirstFile = false; 267 while (At < End) { 268 // parse the member header 269 const char* Save = At; 270 ArchiveMember* mbr = parseMemberHeader(At, End, error); 271 if (!mbr) 272 return false; 273 274 // check if this is the foreign symbol table 275 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 276 // We just save this but don't do anything special 277 // with it. It doesn't count as the "first file". 278 if (foreignST) { 279 // What? Multiple foreign symbol tables? Just chuck it 280 // and retain the last one found. 281 delete foreignST; 282 } 283 foreignST = mbr; 284 At += mbr->getSize(); 285 if ((intptr_t(At) & 1) == 1) 286 At++; 287 } else if (mbr->isStringTable()) { 288 // Simply suck the entire string table into a string 289 // variable. This will be used to get the names of the 290 // members that use the "/ddd" format for their names 291 // (SVR4 style long names). 292 strtab.assign(At, mbr->getSize()); 293 At += mbr->getSize(); 294 if ((intptr_t(At) & 1) == 1) 295 At++; 296 delete mbr; 297 } else if (mbr->isLLVMSymbolTable()) { 298 // This is the LLVM symbol table for the archive. If we've seen it 299 // already, its an error. Otherwise, parse the symbol table and move on. 300 if (seenSymbolTable) { 301 if (error) 302 *error = "invalid archive: multiple symbol tables"; 303 return false; 304 } 305 if (!parseSymbolTable(mbr->getData(), mbr->getSize(), error)) 306 return false; 307 seenSymbolTable = true; 308 At += mbr->getSize(); 309 if ((intptr_t(At) & 1) == 1) 310 At++; 311 delete mbr; // We don't need this member in the list of members. 312 } else { 313 // This is just a regular file. If its the first one, save its offset. 314 // Otherwise just push it on the list and move on to the next file. 315 if (!foundFirstFile) { 316 firstFileOffset = Save - base; 317 foundFirstFile = true; 318 } 319 members.push_back(mbr); 320 At += mbr->getSize(); 321 if ((intptr_t(At) & 1) == 1) 322 At++; 323 } 324 } 325 return true; 326} 327 328// Open and completely load the archive file. 329Archive*
|
330Archive::OpenAndLoad(const sys::Path& file, std::string* ErrorMessage)
331{
332 std::auto_ptr result ( new Archive(file));
|
330Archive::OpenAndLoad(const sys::Path& file, LLVMContext& C, 331 std::string* ErrorMessage) { 332 std::auto_ptr<Archive> result ( new Archive(file, C)); |
333 if (result->mapToMemory(ErrorMessage)) 334 return 0; 335 if (!result->loadArchive(ErrorMessage)) 336 return 0; 337 return result.release(); 338} 339 340// Get all the bitcode modules from the archive 341bool
|
342Archive::getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage) {
|
342Archive::getAllModules(std::vector& Modules, 343 std::string* ErrMessage) { |
344 345 for (iterator I=begin(), E=end(); I != E; ++I) { 346 if (I->isBitcode()) { 347 std::string FullMemberName = archPath.toString() + 348 "(" + I->getPath().toString() + ")"; 349 MemoryBuffer *Buffer = 350 MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str()); 351 memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize()); 352
|
352 Module *M = ParseBitcodeFile(Buffer, ErrMessage);
|
353 Module *M = ParseBitcodeFile(Buffer, Context, ErrMessage); |
354 delete Buffer; 355 if (!M) 356 return true; 357 358 Modules.push_back(M); 359 } 360 } 361 return false; 362} 363 364// Load just the symbol table from the archive file 365bool 366Archive::loadSymbolTable(std::string* ErrorMsg) { 367 368 // Set up parsing 369 members.clear(); 370 symTab.clear(); 371 const char *At = base; 372 const char *End = mapfile->getBufferEnd(); 373 374 // Make sure we're dealing with an archive 375 if (!checkSignature(ErrorMsg)) 376 return false; 377 378 At += 8; // Skip signature 379 380 // Parse the first file member header 381 const char* FirstFile = At; 382 ArchiveMember* mbr = parseMemberHeader(At, End, ErrorMsg); 383 if (!mbr) 384 return false; 385 386 if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { 387 // Skip the foreign symbol table, we don't do anything with it 388 At += mbr->getSize(); 389 if ((intptr_t(At) & 1) == 1) 390 At++; 391 delete mbr; 392 393 // Read the next one 394 FirstFile = At; 395 mbr = parseMemberHeader(At, End, ErrorMsg); 396 if (!mbr) { 397 delete mbr; 398 return false; 399 } 400 } 401 402 if (mbr->isStringTable()) { 403 // Process the string table entry 404 strtab.assign((const char*)mbr->getData(), mbr->getSize()); 405 At += mbr->getSize(); 406 if ((intptr_t(At) & 1) == 1) 407 At++; 408 delete mbr; 409 // Get the next one 410 FirstFile = At; 411 mbr = parseMemberHeader(At, End, ErrorMsg); 412 if (!mbr) { 413 delete mbr; 414 return false; 415 } 416 } 417 418 // See if its the symbol table 419 if (mbr->isLLVMSymbolTable()) { 420 if (!parseSymbolTable(mbr->getData(), mbr->getSize(), ErrorMsg)) { 421 delete mbr; 422 return false; 423 } 424 425 At += mbr->getSize(); 426 if ((intptr_t(At) & 1) == 1) 427 At++; 428 delete mbr; 429 // Can't be any more symtab headers so just advance 430 FirstFile = At; 431 } else { 432 // There's no symbol table in the file. We have to rebuild it from scratch 433 // because the intent of this method is to get the symbol table loaded so 434 // it can be searched efficiently. 435 // Add the member to the members list 436 members.push_back(mbr); 437 } 438 439 firstFileOffset = FirstFile - base; 440 return true; 441} 442 443// Open the archive and load just the symbol tables
|
443Archive*
444Archive::OpenAndLoadSymbols(const sys::Path& file, std::string* ErrorMessage) {
445 std::auto_ptr<Archive> result ( new Archive(file) );
|
444Archive* Archive::OpenAndLoadSymbols(const sys::Path& file, 445 LLVMContext& C, 446 std::string* ErrorMessage) { 447 std::auto_ptr<Archive> result ( new Archive(file, C) ); |
448 if (result->mapToMemory(ErrorMessage)) 449 return 0; 450 if (!result->loadSymbolTable(ErrorMessage)) 451 return 0; 452 return result.release(); 453} 454 455// Look up one symbol in the symbol table and return a ModuleProvider for the 456// module that defines that symbol. 457ModuleProvider* 458Archive::findModuleDefiningSymbol(const std::string& symbol, 459 std::string* ErrMsg) { 460 SymTabType::iterator SI = symTab.find(symbol); 461 if (SI == symTab.end()) 462 return 0; 463 464 // The symbol table was previously constructed assuming that the members were 465 // written without the symbol table header. Because VBR encoding is used, the 466 // values could not be adjusted to account for the offset of the symbol table 467 // because that could affect the size of the symbol table due to VBR encoding. 468 // We now have to account for this by adjusting the offset by the size of the 469 // symbol table and its header. 470 unsigned fileOffset = 471 SI->second + // offset in symbol-table-less file 472 firstFileOffset; // add offset to first "real" file in archive 473 474 // See if the module is already loaded 475 ModuleMap::iterator MI = modules.find(fileOffset); 476 if (MI != modules.end()) 477 return MI->second.first; 478 479 // Module hasn't been loaded yet, we need to load it 480 const char* modptr = base + fileOffset; 481 ArchiveMember* mbr = parseMemberHeader(modptr, mapfile->getBufferEnd(), 482 ErrMsg); 483 if (!mbr) 484 return 0; 485 486 // Now, load the bitcode module to get the ModuleProvider 487 std::string FullMemberName = archPath.toString() + "(" + 488 mbr->getPath().toString() + ")"; 489 MemoryBuffer *Buffer =MemoryBuffer::getNewMemBuffer(mbr->getSize(), 490 FullMemberName.c_str()); 491 memcpy((char*)Buffer->getBufferStart(), mbr->getData(), mbr->getSize()); 492
|
491 ModuleProvider *mp = getBitcodeModuleProvider(Buffer, ErrMsg);
|
493 ModuleProvider *mp = getBitcodeModuleProvider(Buffer, Context, ErrMsg); |
494 if (!mp) 495 return 0; 496 497 modules.insert(std::make_pair(fileOffset, std::make_pair(mp, mbr))); 498 499 return mp; 500} 501 502// Look up multiple symbols in the symbol table and return a set of 503// ModuleProviders that define those symbols. 504bool 505Archive::findModulesDefiningSymbols(std::set<std::string>& symbols, 506 std::set<ModuleProvider*>& result, 507 std::string* error) { 508 if (!mapfile || !base) { 509 if (error) 510 *error = "Empty archive invalid for finding modules defining symbols"; 511 return false; 512 } 513 514 if (symTab.empty()) { 515 // We don't have a symbol table, so we must build it now but lets also 516 // make sure that we populate the modules table as we do this to ensure 517 // that we don't load them twice when findModuleDefiningSymbol is called 518 // below. 519 520 // Get a pointer to the first file 521 const char* At = base + firstFileOffset; 522 const char* End = mapfile->getBufferEnd(); 523 524 while ( At < End) { 525 // Compute the offset to be put in the symbol table 526 unsigned offset = At - base - firstFileOffset; 527 528 // Parse the file's header 529 ArchiveMember* mbr = parseMemberHeader(At, End, error); 530 if (!mbr) 531 return false; 532 533 // If it contains symbols 534 if (mbr->isBitcode()) { 535 // Get the symbols 536 std::vector<std::string> symbols; 537 std::string FullMemberName = archPath.toString() + "(" + 538 mbr->getPath().toString() + ")"; 539 ModuleProvider* MP = 540 GetBitcodeSymbols((const unsigned char*)At, mbr->getSize(),
|
539 FullMemberName, symbols, error);
|
541 FullMemberName, Context, symbols, error); |
542 543 if (MP) { 544 // Insert the module's symbols into the symbol table 545 for (std::vector<std::string>::iterator I = symbols.begin(), 546 E=symbols.end(); I != E; ++I ) { 547 symTab.insert(std::make_pair(*I, offset)); 548 } 549 // Insert the ModuleProvider and the ArchiveMember into the table of 550 // modules. 551 modules.insert(std::make_pair(offset, std::make_pair(MP, mbr))); 552 } else { 553 if (error) 554 *error = "Can't parse bitcode member: " + 555 mbr->getPath().toString() + ": " + *error; 556 delete mbr; 557 return false; 558 } 559 } 560 561 // Go to the next file location 562 At += mbr->getSize(); 563 if ((intptr_t(At) & 1) == 1) 564 At++; 565 } 566 } 567 568 // At this point we have a valid symbol table (one way or another) so we 569 // just use it to quickly find the symbols requested. 570 571 for (std::set<std::string>::iterator I=symbols.begin(), 572 E=symbols.end(); I != E;) { 573 // See if this symbol exists 574 ModuleProvider* mp = findModuleDefiningSymbol(*I,error); 575 if (mp) { 576 // The symbol exists, insert the ModuleProvider into our result, 577 // duplicates wil be ignored 578 result.insert(mp); 579 580 // Remove the symbol now that its been resolved, being careful to 581 // post-increment the iterator. 582 symbols.erase(I++); 583 } else { 584 ++I; 585 } 586 } 587 return true; 588} 589 590bool Archive::isBitcodeArchive() { 591 // Make sure the symTab has been loaded. In most cases this should have been 592 // done when the archive was constructed, but still, this is just in case. 593 if (symTab.empty()) 594 if (!loadSymbolTable(0)) 595 return false; 596 597 // Now that we know it's been loaded, return true 598 // if it has a size 599 if (symTab.size()) return true; 600 601 // We still can't be sure it isn't a bitcode archive 602 if (!loadArchive(0)) 603 return false; 604 605 std::vector<Module *> Modules; 606 std::string ErrorMessage; 607 608 // Scan the archive, trying to load a bitcode member. We only load one to 609 // see if this works. 610 for (iterator I = begin(), E = end(); I != E; ++I) { 611 if (!I->isBitcode()) 612 continue; 613 614 std::string FullMemberName = 615 archPath.toString() + "(" + I->getPath().toString() + ")"; 616 617 MemoryBuffer *Buffer = 618 MemoryBuffer::getNewMemBuffer(I->getSize(), FullMemberName.c_str()); 619 memcpy((char*)Buffer->getBufferStart(), I->getData(), I->getSize());
|
618 Module *M = ParseBitcodeFile(Buffer);
|
620 Module *M = ParseBitcodeFile(Buffer, Context); |
621 delete Buffer; 622 if (!M) 623 return false; // Couldn't parse bitcode, not a bitcode archive. 624 delete M; 625 return true; 626 } 627 628 return false; 629}
|