HeaderMap.cpp revision 198092
1176348Smarcel//===--- HeaderMap.cpp - A file that acts like dir of symlinks ------------===//
2176348Smarcel//
3176348Smarcel//                     The LLVM Compiler Infrastructure
4176348Smarcel//
5176348Smarcel// This file is distributed under the University of Illinois Open Source
6176348Smarcel// License. See LICENSE.TXT for details.
7176348Smarcel//
8176348Smarcel//===----------------------------------------------------------------------===//
9176348Smarcel//
10176348Smarcel// This file implements the HeaderMap interface.
11176348Smarcel//
12176348Smarcel//===----------------------------------------------------------------------===//
13176348Smarcel
14176348Smarcel#include "clang/Lex/HeaderMap.h"
15176348Smarcel#include "clang/Basic/FileManager.h"
16176348Smarcel#include "llvm/ADT/OwningPtr.h"
17176348Smarcel#include "llvm/ADT/SmallString.h"
18176348Smarcel#include "llvm/Support/DataTypes.h"
19176348Smarcel#include "llvm/Support/MathExtras.h"
20176348Smarcel#include "llvm/Support/MemoryBuffer.h"
21176348Smarcel#include <cstdio>
22176348Smarcelusing namespace clang;
23176348Smarcel
24176348Smarcel//===----------------------------------------------------------------------===//
25176348Smarcel// Data Structures and Manifest Constants
26176348Smarcel//===----------------------------------------------------------------------===//
27176348Smarcel
28176348Smarcelenum {
29176348Smarcel  HMAP_HeaderMagicNumber = ('h' << 24) | ('m' << 16) | ('a' << 8) | 'p',
30176348Smarcel  HMAP_HeaderVersion = 1,
31176348Smarcel
32176348Smarcel  HMAP_EmptyBucketKey = 0
33176348Smarcel};
34176348Smarcel
35176348Smarcelnamespace clang {
36176348Smarcelstruct HMapBucket {
37176348Smarcel  uint32_t Key;          // Offset (into strings) of key.
38176348Smarcel
39176348Smarcel  uint32_t Prefix;     // Offset (into strings) of value prefix.
40176348Smarcel  uint32_t Suffix;     // Offset (into strings) of value suffix.
41176348Smarcel};
42176348Smarcel
43176348Smarcelstruct HMapHeader {
44176348Smarcel  uint32_t Magic;           // Magic word, also indicates byte order.
45176348Smarcel  uint16_t Version;         // Version number -- currently 1.
46176348Smarcel  uint16_t Reserved;        // Reserved for future use - zero for now.
47176348Smarcel  uint32_t StringsOffset;   // Offset to start of string pool.
48176348Smarcel  uint32_t NumEntries;      // Number of entries in the string table.
49176348Smarcel  uint32_t NumBuckets;      // Number of buckets (always a power of 2).
50176348Smarcel  uint32_t MaxValueLength;  // Length of longest result path (excluding nul).
51176348Smarcel  // An array of 'NumBuckets' HMapBucket objects follows this header.
52176348Smarcel  // Strings follow the buckets, at StringsOffset.
53176348Smarcel};
54176348Smarcel} // end namespace clang.
55176348Smarcel
56176348Smarcel/// HashHMapKey - This is the 'well known' hash function required by the file
57176348Smarcel/// format, used to look up keys in the hash table.  The hash table uses simple
58176348Smarcel/// linear probing based on this function.
59176348Smarcelstatic inline unsigned HashHMapKey(const char *S, const char *End) {
60176348Smarcel  unsigned Result = 0;
61176348Smarcel
62176348Smarcel  for (; S != End; S++)
63176348Smarcel    Result += tolower(*S) * 13;
64176348Smarcel  return Result;
65176348Smarcel}
66176348Smarcel
67176348Smarcel
68176348Smarcel
69176348Smarcel//===----------------------------------------------------------------------===//
70176348Smarcel// Verification and Construction
71176348Smarcel//===----------------------------------------------------------------------===//
72176348Smarcel
73176348Smarcel/// HeaderMap::Create - This attempts to load the specified file as a header
74176348Smarcel/// map.  If it doesn't look like a HeaderMap, it gives up and returns null.
75176348Smarcel/// If it looks like a HeaderMap but is obviously corrupted, it puts a reason
76176348Smarcel/// into the string error argument and returns null.
77176348Smarcelconst HeaderMap *HeaderMap::Create(const FileEntry *FE) {
78176348Smarcel  // If the file is too small to be a header map, ignore it.
79176348Smarcel  unsigned FileSize = FE->getSize();
80176348Smarcel  if (FileSize <= sizeof(HMapHeader)) return 0;
81176348Smarcel
82176348Smarcel  llvm::OwningPtr<const llvm::MemoryBuffer> FileBuffer(
83176348Smarcel    llvm::MemoryBuffer::getFile(FE->getName(), 0, FE->getSize()));
84176348Smarcel  if (FileBuffer == 0) return 0;  // Unreadable file?
85176348Smarcel  const char *FileStart = FileBuffer->getBufferStart();
86176348Smarcel
87176348Smarcel  // We know the file is at least as big as the header, check it now.
88176348Smarcel  const HMapHeader *Header = reinterpret_cast<const HMapHeader*>(FileStart);
89176348Smarcel
90176348Smarcel  // Sniff it to see if it's a headermap by checking the magic number and
91176348Smarcel  // version.
92176348Smarcel  bool NeedsByteSwap;
93176348Smarcel  if (Header->Magic == HMAP_HeaderMagicNumber &&
94176348Smarcel      Header->Version == HMAP_HeaderVersion)
95176348Smarcel    NeedsByteSwap = false;
96176348Smarcel  else if (Header->Magic == llvm::ByteSwap_32(HMAP_HeaderMagicNumber) &&
97176348Smarcel           Header->Version == llvm::ByteSwap_16(HMAP_HeaderVersion))
98176348Smarcel    NeedsByteSwap = true;  // Mixed endianness headermap.
99176348Smarcel  else
100176348Smarcel    return 0;  // Not a header map.
101176348Smarcel
102176348Smarcel  if (Header->Reserved != 0) return 0;
103176348Smarcel
104176348Smarcel  // Okay, everything looks good, create the header map.
105176348Smarcel  return new HeaderMap(FileBuffer.take(), NeedsByteSwap);
106176348Smarcel}
107176348Smarcel
108176348SmarcelHeaderMap::~HeaderMap() {
109176348Smarcel  delete FileBuffer;
110176348Smarcel}
111176348Smarcel
112176348Smarcel//===----------------------------------------------------------------------===//
113176348Smarcel//  Utility Methods
114176348Smarcel//===----------------------------------------------------------------------===//
115176348Smarcel
116176348Smarcel
117176348Smarcel/// getFileName - Return the filename of the headermap.
118176348Smarcelconst char *HeaderMap::getFileName() const {
119176348Smarcel  return FileBuffer->getBufferIdentifier();
120176348Smarcel}
121176348Smarcel
122176348Smarcelunsigned HeaderMap::getEndianAdjustedWord(unsigned X) const {
123176348Smarcel  if (!NeedsBSwap) return X;
124176348Smarcel  return llvm::ByteSwap_32(X);
125176348Smarcel}
126176348Smarcel
127176348Smarcel/// getHeader - Return a reference to the file header, in unbyte-swapped form.
128176348Smarcel/// This method cannot fail.
129176348Smarcelconst HMapHeader &HeaderMap::getHeader() const {
130176348Smarcel  // We know the file is at least as big as the header.  Return it.
131176348Smarcel  return *reinterpret_cast<const HMapHeader*>(FileBuffer->getBufferStart());
132176348Smarcel}
133176348Smarcel
134176348Smarcel/// getBucket - Return the specified hash table bucket from the header map,
135176348Smarcel/// bswap'ing its fields as appropriate.  If the bucket number is not valid,
136176348Smarcel/// this return a bucket with an empty key (0).
137176348SmarcelHMapBucket HeaderMap::getBucket(unsigned BucketNo) const {
138176348Smarcel  HMapBucket Result;
139176348Smarcel  Result.Key = HMAP_EmptyBucketKey;
140176348Smarcel
141176348Smarcel  const HMapBucket *BucketArray =
142176348Smarcel    reinterpret_cast<const HMapBucket*>(FileBuffer->getBufferStart() +
143176348Smarcel                                        sizeof(HMapHeader));
144176348Smarcel
145176348Smarcel  const HMapBucket *BucketPtr = BucketArray+BucketNo;
146176348Smarcel  if ((char*)(BucketPtr+1) > FileBuffer->getBufferEnd()) {
147176348Smarcel    Result.Prefix = 0;
148176348Smarcel    Result.Suffix = 0;
149176348Smarcel    return Result;  // Invalid buffer, corrupt hmap.
150176348Smarcel  }
151176348Smarcel
152176348Smarcel  // Otherwise, the bucket is valid.  Load the values, bswapping as needed.
153176348Smarcel  Result.Key    = getEndianAdjustedWord(BucketPtr->Key);
154176348Smarcel  Result.Prefix = getEndianAdjustedWord(BucketPtr->Prefix);
155176348Smarcel  Result.Suffix = getEndianAdjustedWord(BucketPtr->Suffix);
156176348Smarcel  return Result;
157176348Smarcel}
158176348Smarcel
159176348Smarcel/// getString - Look up the specified string in the string table.  If the string
160176348Smarcel/// index is not valid, it returns an empty string.
161176348Smarcelconst char *HeaderMap::getString(unsigned StrTabIdx) const {
162176348Smarcel  // Add the start of the string table to the idx.
163176348Smarcel  StrTabIdx += getEndianAdjustedWord(getHeader().StringsOffset);
164176348Smarcel
165176348Smarcel  // Check for invalid index.
166176348Smarcel  if (StrTabIdx >= FileBuffer->getBufferSize())
167176348Smarcel    return 0;
168176348Smarcel
169176348Smarcel  // Otherwise, we have a valid pointer into the file.  Just return it.  We know
170176348Smarcel  // that the "string" can not overrun the end of the file, because the buffer
171176348Smarcel  // is nul terminated by virtue of being a MemoryBuffer.
172176348Smarcel  return FileBuffer->getBufferStart()+StrTabIdx;
173176348Smarcel}
174176348Smarcel
175176348Smarcel/// StringsEqualWithoutCase - Compare the specified two strings for case-
176176348Smarcel/// insensitive equality, returning true if they are equal.  Both strings are
177176348Smarcel/// known to have the same length.
178176348Smarcelstatic bool StringsEqualWithoutCase(const char *S1, const char *S2,
179176348Smarcel                                    unsigned Len) {
180176348Smarcel  for (; Len; ++S1, ++S2, --Len)
181176348Smarcel    if (tolower(*S1) != tolower(*S2))
182176348Smarcel      return false;
183176348Smarcel  return true;
184176348Smarcel}
185176348Smarcel
186176348Smarcel//===----------------------------------------------------------------------===//
187176348Smarcel// The Main Drivers
188176348Smarcel//===----------------------------------------------------------------------===//
189176348Smarcel
190176348Smarcel/// dump - Print the contents of this headermap to stderr.
191176348Smarcelvoid HeaderMap::dump() const {
192176348Smarcel  const HMapHeader &Hdr = getHeader();
193176348Smarcel  unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
194176348Smarcel
195176348Smarcel  fprintf(stderr, "Header Map %s:\n  %d buckets, %d entries\n",
196176348Smarcel          getFileName(), NumBuckets,
197176348Smarcel          getEndianAdjustedWord(Hdr.NumEntries));
198176348Smarcel
199176348Smarcel  for (unsigned i = 0; i != NumBuckets; ++i) {
200176348Smarcel    HMapBucket B = getBucket(i);
201176348Smarcel    if (B.Key == HMAP_EmptyBucketKey) continue;
202176348Smarcel
203176348Smarcel    const char *Key    = getString(B.Key);
204176348Smarcel    const char *Prefix = getString(B.Prefix);
205176348Smarcel    const char *Suffix = getString(B.Suffix);
206176348Smarcel    fprintf(stderr, "  %d. %s -> '%s' '%s'\n", i, Key, Prefix, Suffix);
207176348Smarcel  }
208176348Smarcel}
209176348Smarcel
210176348Smarcel/// LookupFile - Check to see if the specified relative filename is located in
211176348Smarcel/// this HeaderMap.  If so, open it and return its FileEntry.
212176348Smarcelconst FileEntry *HeaderMap::LookupFile(const char *FilenameStart,
213176348Smarcel                                       const char *FilenameEnd,
214176348Smarcel                                       FileManager &FM) const {
215176348Smarcel  const HMapHeader &Hdr = getHeader();
216176348Smarcel  unsigned NumBuckets = getEndianAdjustedWord(Hdr.NumBuckets);
217176348Smarcel
218176348Smarcel  // If the number of buckets is not a power of two, the headermap is corrupt.
219176348Smarcel  // Don't probe infinitely.
220176348Smarcel  if (NumBuckets & (NumBuckets-1))
221176348Smarcel    return 0;
222176348Smarcel
223176348Smarcel  // Linearly probe the hash table.
224176348Smarcel  for (unsigned Bucket = HashHMapKey(FilenameStart, FilenameEnd);; ++Bucket) {
225176348Smarcel    HMapBucket B = getBucket(Bucket & (NumBuckets-1));
226176348Smarcel    if (B.Key == HMAP_EmptyBucketKey) return 0; // Hash miss.
227
228    // See if the key matches.  If not, probe on.
229    const char *Key = getString(B.Key);
230    unsigned BucketKeyLen = strlen(Key);
231    if (BucketKeyLen != unsigned(FilenameEnd-FilenameStart))
232      continue;
233
234    // See if the actual strings equal.
235    if (!StringsEqualWithoutCase(FilenameStart, Key, BucketKeyLen))
236      continue;
237
238    // If so, we have a match in the hash table.  Construct the destination
239    // path.
240    llvm::SmallString<1024> DestPath;
241    DestPath += getString(B.Prefix);
242    DestPath += getString(B.Suffix);
243    return FM.getFile(DestPath.begin(), DestPath.end());
244  }
245}
246