HeaderMap.cpp revision 198092
1176348Smarcel//===--- HeaderMap.cpp - A file that acts like dir of symlinks ------------===// 2176348Smarcel// 3176348Smarcel// The LLVM Compiler Infrastructure 4176348Smarcel// 5176348Smarcel// This file is distributed under the University of Illinois Open Source 6176348Smarcel// License. See LICENSE.TXT for details. 7176348Smarcel// 8176348Smarcel//===----------------------------------------------------------------------===// 9176348Smarcel// 10176348Smarcel// This file implements the HeaderMap interface. 11176348Smarcel// 12176348Smarcel//===----------------------------------------------------------------------===// 13176348Smarcel 14176348Smarcel#include "clang/Lex/HeaderMap.h" 15176348Smarcel#include "clang/Basic/FileManager.h" 16176348Smarcel#include "llvm/ADT/OwningPtr.h" 17176348Smarcel#include "llvm/ADT/SmallString.h" 18176348Smarcel#include "llvm/Support/DataTypes.h" 19176348Smarcel#include "llvm/Support/MathExtras.h" 20176348Smarcel#include "llvm/Support/MemoryBuffer.h" 21176348Smarcel#include <cstdio> 22176348Smarcelusing namespace clang; 23176348Smarcel 24176348Smarcel//===----------------------------------------------------------------------===// 25176348Smarcel// Data Structures and Manifest Constants 26176348Smarcel//===----------------------------------------------------------------------===// 27176348Smarcel 28176348Smarcelenum { 29176348Smarcel HMAP_HeaderMagicNumber = ('h' << 24) | ('m' << 16) | ('a' << 8) | 'p', 30176348Smarcel HMAP_HeaderVersion = 1, 31176348Smarcel 32176348Smarcel HMAP_EmptyBucketKey = 0 33176348Smarcel}; 34176348Smarcel 35176348Smarcelnamespace clang { 36176348Smarcelstruct HMapBucket { 37176348Smarcel uint32_t Key; // Offset (into strings) of key. 38176348Smarcel 39176348Smarcel uint32_t Prefix; // Offset (into strings) of value prefix. 40176348Smarcel uint32_t Suffix; // Offset (into strings) of value suffix. 41176348Smarcel}; 42176348Smarcel 43176348Smarcelstruct HMapHeader { 44176348Smarcel uint32_t Magic; // Magic word, also indicates byte order. 45176348Smarcel uint16_t Version; // Version number -- currently 1. 46176348Smarcel uint16_t Reserved; // Reserved for future use - zero for now. 47176348Smarcel uint32_t StringsOffset; // Offset to start of string pool. 48176348Smarcel uint32_t NumEntries; // Number of entries in the string table. 49176348Smarcel uint32_t NumBuckets; // Number of buckets (always a power of 2). 50176348Smarcel uint32_t MaxValueLength; // Length of longest result path (excluding nul). 51176348Smarcel // An array of 'NumBuckets' HMapBucket objects follows this header. 52176348Smarcel // Strings follow the buckets, at StringsOffset. 53176348Smarcel}; 54176348Smarcel} // end namespace clang. 55176348Smarcel 56176348Smarcel/// HashHMapKey - This is the 'well known' hash function required by the file 57176348Smarcel/// format, used to look up keys in the hash table. The hash table uses simple 58176348Smarcel/// linear probing based on this function. 59176348Smarcelstatic inline unsigned HashHMapKey(const char *S, const char *End) { 60176348Smarcel unsigned Result = 0; 61176348Smarcel 62176348Smarcel for (; S != End; S++) 63176348Smarcel Result += tolower(*S) * 13; 64176348Smarcel return Result; 65176348Smarcel} 66176348Smarcel 67176348Smarcel 68176348Smarcel 69176348Smarcel//===----------------------------------------------------------------------===// 70176348Smarcel// Verification and Construction 71176348Smarcel//===----------------------------------------------------------------------===// 72176348Smarcel 73176348Smarcel/// HeaderMap::Create - This attempts to load the specified file as a header 74176348Smarcel/// map. If it doesn't look like a HeaderMap, it gives up and returns null. 75176348Smarcel/// If it looks like a HeaderMap but is obviously corrupted, it puts a reason 76176348Smarcel/// into the string error argument and returns null. 77176348Smarcelconst HeaderMap *HeaderMap::Create(const FileEntry *FE) { 78176348Smarcel // If the file is too small to be a header map, ignore it. 79176348Smarcel unsigned FileSize = FE->getSize(); 80176348Smarcel if (FileSize <= sizeof(HMapHeader)) return 0; 81176348Smarcel 82176348Smarcel llvm::OwningPtr<const llvm::MemoryBuffer> FileBuffer( 83176348Smarcel llvm::MemoryBuffer::getFile(FE->getName(), 0, FE->getSize())); 84176348Smarcel if (FileBuffer == 0) return 0; // Unreadable file? 85176348Smarcel const char *FileStart = FileBuffer->getBufferStart(); 86176348Smarcel 87176348Smarcel // We know the file is at least as big as the header, check it now. 88176348Smarcel const HMapHeader *Header = reinterpret_cast<const HMapHeader*>(FileStart); 89176348Smarcel 90176348Smarcel // Sniff it to see if it's a headermap by checking the magic number and 91176348Smarcel // version. 92176348Smarcel bool NeedsByteSwap; 93176348Smarcel if (Header->Magic == HMAP_HeaderMagicNumber && 94176348Smarcel Header->Version == HMAP_HeaderVersion) 95176348Smarcel NeedsByteSwap = false; 96176348Smarcel else if (Header->Magic == llvm::ByteSwap_32(HMAP_HeaderMagicNumber) && 97176348Smarcel Header->Version == llvm::ByteSwap_16(HMAP_HeaderVersion)) 98176348Smarcel NeedsByteSwap = true; // Mixed endianness headermap. 99176348Smarcel else 100176348Smarcel return 0; // Not a header map. 101176348Smarcel 102176348Smarcel if (Header->Reserved != 0) return 0; 103176348Smarcel 104176348Smarcel // Okay, everything looks good, create the header map. 105176348Smarcel return new HeaderMap(FileBuffer.take(), NeedsByteSwap); 106176348Smarcel} 107176348Smarcel 108176348SmarcelHeaderMap::~HeaderMap() { 109176348Smarcel delete FileBuffer; 110176348Smarcel} 111176348Smarcel 112176348Smarcel//===----------------------------------------------------------------------===// 113176348Smarcel// Utility Methods 114176348Smarcel//===----------------------------------------------------------------------===// 115176348Smarcel 116176348Smarcel 117176348Smarcel/// getFileName - Return the filename of the headermap. 118176348Smarcelconst char *HeaderMap::getFileName() const { 119176348Smarcel return FileBuffer->getBufferIdentifier(); 120176348Smarcel} 121176348Smarcel 122176348Smarcelunsigned HeaderMap::getEndianAdjustedWord(unsigned X) const { 123176348Smarcel if (!NeedsBSwap) return X; 124176348Smarcel return llvm::ByteSwap_32(X); 125176348Smarcel} 126176348Smarcel 127176348Smarcel/// getHeader - Return a reference to the file header, in unbyte-swapped form. 128176348Smarcel/// This method cannot fail. 129176348Smarcelconst HMapHeader &HeaderMap::getHeader() const { 130176348Smarcel // We know the file is at least as big as the header. Return it. 131176348Smarcel return *reinterpret_cast<const HMapHeader*>(FileBuffer->getBufferStart()); 132176348Smarcel} 133176348Smarcel 134176348Smarcel/// getBucket - Return the specified hash table bucket from the header map, 135176348Smarcel/// bswap'ing its fields as appropriate. If the bucket number is not valid, 136176348Smarcel/// this return a bucket with an empty key (0). 137176348SmarcelHMapBucket HeaderMap::getBucket(unsigned BucketNo) const { 138176348Smarcel HMapBucket Result; 139176348Smarcel Result.Key = HMAP_EmptyBucketKey; 140176348Smarcel 141176348Smarcel const HMapBucket *BucketArray = 142176348Smarcel reinterpret_cast<const HMapBucket*>(FileBuffer->getBufferStart() + 143176348Smarcel sizeof(HMapHeader)); 144176348Smarcel 145176348Smarcel const HMapBucket *BucketPtr = BucketArray+BucketNo; 146176348Smarcel if ((char*)(BucketPtr+1) > FileBuffer->getBufferEnd()) { 147176348Smarcel Result.Prefix = 0; 148176348Smarcel Result.Suffix = 0; 149176348Smarcel return Result; // Invalid buffer, corrupt hmap. 150176348Smarcel } 151176348Smarcel 152176348Smarcel // Otherwise, the bucket is valid. Load the values, bswapping as needed. 153176348Smarcel Result.Key = getEndianAdjustedWord(BucketPtr->Key); 154176348Smarcel Result.Prefix = getEndianAdjustedWord(BucketPtr->Prefix); 155176348Smarcel Result.Suffix = getEndianAdjustedWord(BucketPtr->Suffix); 156176348Smarcel return Result; 157176348Smarcel} 158176348Smarcel 159176348Smarcel/// getString - Look up the specified string in the string table. If the string 160176348Smarcel/// index is not valid, it returns an empty string. 161176348Smarcelconst char *HeaderMap::getString(unsigned StrTabIdx) const { 162176348Smarcel // Add the start of the string table to the idx. 163176348Smarcel StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset); 164176348Smarcel 165176348Smarcel // Check for invalid index. 166176348Smarcel if (StrTabIdx >= FileBuffer->getBufferSize()) 167176348Smarcel return 0; 168176348Smarcel 169176348Smarcel // Otherwise, we have a valid pointer into the file. Just return it. We know 170176348Smarcel // that the "string" can not overrun the end of the file, because the buffer 171176348Smarcel // is nul terminated by virtue of being a MemoryBuffer. 172176348Smarcel return FileBuffer->getBufferStart()+StrTabIdx; 173176348Smarcel} 174176348Smarcel 175176348Smarcel/// StringsEqualWithoutCase - Compare the specified two strings for case- 176176348Smarcel/// insensitive equality, returning true if they are equal. Both strings are 177176348Smarcel/// known to have the same length. 178176348Smarcelstatic bool StringsEqualWithoutCase(const char *S1, const char *S2, 179176348Smarcel unsigned Len) { 180176348Smarcel for (; Len; ++S1, ++S2, --Len) 181176348Smarcel if (tolower(*S1) != tolower(*S2)) 182176348Smarcel return false; 183176348Smarcel return true; 184176348Smarcel} 185176348Smarcel 186176348Smarcel//===----------------------------------------------------------------------===// 187176348Smarcel// The Main Drivers 188176348Smarcel//===----------------------------------------------------------------------===// 189176348Smarcel 190176348Smarcel/// dump - Print the contents of this headermap to stderr. 191176348Smarcelvoid HeaderMap::dump() const { 192176348Smarcel const HMapHeader &Hdr = getHeader(); 193176348Smarcel unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets); 194176348Smarcel 195176348Smarcel fprintf(stderr, "Header Map %s:\n %d buckets, %d entries\n", 196176348Smarcel getFileName(), NumBuckets, 197176348Smarcel getEndianAdjustedWord(Hdr.NumEntries)); 198176348Smarcel 199176348Smarcel for (unsigned i = 0; i != NumBuckets; ++i) { 200176348Smarcel HMapBucket B = getBucket(i); 201176348Smarcel if (B.Key == HMAP_EmptyBucketKey) continue; 202176348Smarcel 203176348Smarcel const char *Key = getString(B.Key); 204176348Smarcel const char *Prefix = getString(B.Prefix); 205176348Smarcel const char *Suffix = getString(B.Suffix); 206176348Smarcel fprintf(stderr, " %d. %s -> '%s' '%s'\n", i, Key, Prefix, Suffix); 207176348Smarcel } 208176348Smarcel} 209176348Smarcel 210176348Smarcel/// LookupFile - Check to see if the specified relative filename is located in 211176348Smarcel/// this HeaderMap. If so, open it and return its FileEntry. 212176348Smarcelconst FileEntry *HeaderMap::LookupFile(const char *FilenameStart, 213176348Smarcel const char *FilenameEnd, 214176348Smarcel FileManager &FM) const { 215176348Smarcel const HMapHeader &Hdr = getHeader(); 216176348Smarcel unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets); 217176348Smarcel 218176348Smarcel // If the number of buckets is not a power of two, the headermap is corrupt. 219176348Smarcel // Don't probe infinitely. 220176348Smarcel if (NumBuckets & (NumBuckets-1)) 221176348Smarcel return 0; 222176348Smarcel 223176348Smarcel // Linearly probe the hash table. 224176348Smarcel for (unsigned Bucket = HashHMapKey(FilenameStart, FilenameEnd);; ++Bucket) { 225176348Smarcel HMapBucket B = getBucket(Bucket & (NumBuckets-1)); 226176348Smarcel if (B.Key == HMAP_EmptyBucketKey) return 0; // Hash miss. 227 228 // See if the key matches. If not, probe on. 229 const char *Key = getString(B.Key); 230 unsigned BucketKeyLen = strlen(Key); 231 if (BucketKeyLen != unsigned(FilenameEnd-FilenameStart)) 232 continue; 233 234 // See if the actual strings equal. 235 if (!StringsEqualWithoutCase(FilenameStart, Key, BucketKeyLen)) 236 continue; 237 238 // If so, we have a match in the hash table. Construct the destination 239 // path. 240 llvm::SmallString<1024> DestPath; 241 DestPath += getString(B.Prefix); 242 DestPath += getString(B.Suffix); 243 return FM.getFile(DestPath.begin(), DestPath.end()); 244 } 245} 246