1274955Ssvnmir//===-- StringTableBuilder.cpp - String table building utility ------------===//
2274955Ssvnmir//
3274955Ssvnmir//                     The LLVM Compiler Infrastructure
4274955Ssvnmir//
5274955Ssvnmir// This file is distributed under the University of Illinois Open Source
6274955Ssvnmir// License. See LICENSE.TXT for details.
7274955Ssvnmir//
8274955Ssvnmir//===----------------------------------------------------------------------===//
9274955Ssvnmir
10274955Ssvnmir#include "llvm/MC/StringTableBuilder.h"
11296417Sdim#include "llvm/ADT/STLExtras.h"
12280031Sdim#include "llvm/Support/COFF.h"
13280031Sdim#include "llvm/Support/Endian.h"
14274955Ssvnmir
15296417Sdim#include <vector>
16296417Sdim
17274955Ssvnmirusing namespace llvm;
18274955Ssvnmir
19296417SdimStringTableBuilder::StringTableBuilder(Kind K) : K(K) {}
20296417Sdim
21296417Sdimtypedef std::pair<StringRef, size_t> StringPair;
22296417Sdim
23296417Sdim// Returns the character at Pos from end of a string.
24296417Sdimstatic int charTailAt(StringPair *P, size_t Pos) {
25296417Sdim  StringRef S = P->first;
26296417Sdim  if (Pos >= S.size())
27296417Sdim    return -1;
28296417Sdim  return (unsigned char)S[S.size() - Pos - 1];
29296417Sdim}
30296417Sdim
31296417Sdim// Three-way radix quicksort. This is much faster than std::sort with strcmp
32296417Sdim// because it does not compare characters that we already know the same.
33296417Sdimstatic void multikey_qsort(StringPair **Begin, StringPair **End, int Pos) {
34296417Sdimtailcall:
35296417Sdim  if (End - Begin <= 1)
36296417Sdim    return;
37296417Sdim
38296417Sdim  // Partition items. Items in [Begin, P) are greater than the pivot,
39296417Sdim  // [P, Q) are the same as the pivot, and [Q, End) are less than the pivot.
40296417Sdim  int Pivot = charTailAt(*Begin, Pos);
41296417Sdim  StringPair **P = Begin;
42296417Sdim  StringPair **Q = End;
43296417Sdim  for (StringPair **R = Begin + 1; R < Q;) {
44296417Sdim    int C = charTailAt(*R, Pos);
45296417Sdim    if (C > Pivot)
46296417Sdim      std::swap(*P++, *R++);
47296417Sdim    else if (C < Pivot)
48296417Sdim      std::swap(*--Q, *R);
49296417Sdim    else
50296417Sdim      R++;
51274955Ssvnmir  }
52296417Sdim
53296417Sdim  multikey_qsort(Begin, P, Pos);
54296417Sdim  multikey_qsort(Q, End, Pos);
55296417Sdim  if (Pivot != -1) {
56296417Sdim    // qsort(P, Q, Pos + 1), but with tail call optimization.
57296417Sdim    Begin = P;
58296417Sdim    End = Q;
59296417Sdim    ++Pos;
60296417Sdim    goto tailcall;
61296417Sdim  }
62274955Ssvnmir}
63274955Ssvnmir
64296417Sdimvoid StringTableBuilder::finalize() {
65296417Sdim  std::vector<std::pair<StringRef, size_t> *> Strings;
66280031Sdim  Strings.reserve(StringIndexMap.size());
67296417Sdim  for (std::pair<StringRef, size_t> &P : StringIndexMap)
68296417Sdim    Strings.push_back(&P);
69280031Sdim
70296417Sdim  if (!Strings.empty())
71296417Sdim    multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0);
72274955Ssvnmir
73296417Sdim  switch (K) {
74296417Sdim  case RAW:
75296417Sdim    break;
76280031Sdim  case ELF:
77280031Sdim  case MachO:
78280031Sdim    // Start the table with a NUL byte.
79280031Sdim    StringTable += '\x00';
80280031Sdim    break;
81280031Sdim  case WinCOFF:
82280031Sdim    // Make room to write the table size later.
83280031Sdim    StringTable.append(4, '\x00');
84280031Sdim    break;
85280031Sdim  }
86274955Ssvnmir
87274955Ssvnmir  StringRef Previous;
88296417Sdim  for (std::pair<StringRef, size_t> *P : Strings) {
89296417Sdim    StringRef S = P->first;
90296417Sdim    if (K == WinCOFF)
91296417Sdim      assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
92280031Sdim
93296417Sdim    if (Previous.endswith(S)) {
94296417Sdim      P->second = StringTable.size() - S.size() - (K != RAW);
95274955Ssvnmir      continue;
96274955Ssvnmir    }
97274955Ssvnmir
98296417Sdim    P->second = StringTable.size();
99296417Sdim    StringTable += S;
100296417Sdim    if (K != RAW)
101296417Sdim      StringTable += '\x00';
102296417Sdim    Previous = S;
103274955Ssvnmir  }
104280031Sdim
105296417Sdim  switch (K) {
106296417Sdim  case RAW:
107280031Sdim  case ELF:
108280031Sdim    break;
109280031Sdim  case MachO:
110280031Sdim    // Pad to multiple of 4.
111280031Sdim    while (StringTable.size() % 4)
112280031Sdim      StringTable += '\x00';
113280031Sdim    break;
114280031Sdim  case WinCOFF:
115280031Sdim    // Write the table size in the first word.
116280031Sdim    assert(StringTable.size() <= std::numeric_limits<uint32_t>::max());
117296417Sdim    uint32_t Size = static_cast<uint32_t>(StringTable.size());
118280031Sdim    support::endian::write<uint32_t, support::little, support::unaligned>(
119296417Sdim        StringTable.data(), Size);
120280031Sdim    break;
121280031Sdim  }
122296417Sdim
123296417Sdim  Size = StringTable.size();
124274955Ssvnmir}
125280031Sdim
126280031Sdimvoid StringTableBuilder::clear() {
127280031Sdim  StringTable.clear();
128280031Sdim  StringIndexMap.clear();
129280031Sdim}
130296417Sdim
131296417Sdimsize_t StringTableBuilder::getOffset(StringRef S) const {
132296417Sdim  assert(isFinalized());
133296417Sdim  auto I = StringIndexMap.find(S);
134296417Sdim  assert(I != StringIndexMap.end() && "String is not in table!");
135296417Sdim  return I->second;
136296417Sdim}
137296417Sdim
138296417Sdimsize_t StringTableBuilder::add(StringRef S) {
139296417Sdim  assert(!isFinalized());
140296417Sdim  auto P = StringIndexMap.insert(std::make_pair(S, Size));
141296417Sdim  if (P.second)
142296417Sdim    Size += S.size() + (K != RAW);
143296417Sdim  return P.first->second;
144296417Sdim}
145