1//===- StringTableBuilder.cpp - String table building utility -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/MC/StringTableBuilder.h"
10#include "llvm/ADT/CachedHashString.h"
11#include "llvm/ADT/SmallString.h"
12#include "llvm/ADT/StringRef.h"
13#include "llvm/BinaryFormat/COFF.h"
14#include "llvm/Support/Endian.h"
15#include "llvm/Support/MathExtras.h"
16#include "llvm/Support/raw_ostream.h"
17#include <cassert>
18#include <cstddef>
19#include <cstdint>
20#include <cstring>
21#include <utility>
22#include <vector>
23
24using namespace llvm;
25
26StringTableBuilder::~StringTableBuilder() = default;
27
28void StringTableBuilder::initSize() {
29  // Account for leading bytes in table so that offsets returned from add are
30  // correct.
31  switch (K) {
32  case RAW:
33  case DWARF:
34    Size = 0;
35    break;
36  case MachO:
37  case ELF:
38    // Start the table with a NUL byte.
39    Size = 1;
40    break;
41  case XCOFF:
42  case WinCOFF:
43    // Make room to write the table size later.
44    Size = 4;
45    break;
46  }
47}
48
49StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment)
50    : K(K), Alignment(Alignment) {
51  initSize();
52}
53
54void StringTableBuilder::write(raw_ostream &OS) const {
55  assert(isFinalized());
56  SmallString<0> Data;
57  Data.resize(getSize());
58  write((uint8_t *)Data.data());
59  OS << Data;
60}
61
62using StringPair = std::pair<CachedHashStringRef, size_t>;
63
64void StringTableBuilder::write(uint8_t *Buf) const {
65  assert(isFinalized());
66  for (const StringPair &P : StringIndexMap) {
67    StringRef Data = P.first.val();
68    if (!Data.empty())
69      memcpy(Buf + P.second, Data.data(), Data.size());
70  }
71  // The COFF formats store the size of the string table in the first 4 bytes.
72  // For Windows, the format is little-endian; for AIX, it is big-endian.
73  if (K == WinCOFF)
74    support::endian::write32le(Buf, Size);
75  else if (K == XCOFF)
76    support::endian::write32be(Buf, Size);
77}
78
79// Returns the character at Pos from end of a string.
80static int charTailAt(StringPair *P, size_t Pos) {
81  StringRef S = P->first.val();
82  if (Pos >= S.size())
83    return -1;
84  return (unsigned char)S[S.size() - Pos - 1];
85}
86
87// Three-way radix quicksort. This is much faster than std::sort with strcmp
88// because it does not compare characters that we already know the same.
89static void multikeySort(MutableArrayRef<StringPair *> Vec, int Pos) {
90tailcall:
91  if (Vec.size() <= 1)
92    return;
93
94  // Partition items so that items in [0, I) are greater than the pivot,
95  // [I, J) are the same as the pivot, and [J, Vec.size()) are less than
96  // the pivot.
97  int Pivot = charTailAt(Vec[0], Pos);
98  size_t I = 0;
99  size_t J = Vec.size();
100  for (size_t K = 1; K < J;) {
101    int C = charTailAt(Vec[K], Pos);
102    if (C > Pivot)
103      std::swap(Vec[I++], Vec[K++]);
104    else if (C < Pivot)
105      std::swap(Vec[--J], Vec[K]);
106    else
107      K++;
108  }
109
110  multikeySort(Vec.slice(0, I), Pos);
111  multikeySort(Vec.slice(J), Pos);
112
113  // multikeySort(Vec.slice(I, J - I), Pos + 1), but with
114  // tail call optimization.
115  if (Pivot != -1) {
116    Vec = Vec.slice(I, J - I);
117    ++Pos;
118    goto tailcall;
119  }
120}
121
122void StringTableBuilder::finalize() {
123  assert(K != DWARF);
124  finalizeStringTable(/*Optimize=*/true);
125}
126
127void StringTableBuilder::finalizeInOrder() {
128  finalizeStringTable(/*Optimize=*/false);
129}
130
131void StringTableBuilder::finalizeStringTable(bool Optimize) {
132  Finalized = true;
133
134  if (Optimize) {
135    std::vector<StringPair *> Strings;
136    Strings.reserve(StringIndexMap.size());
137    for (StringPair &P : StringIndexMap)
138      Strings.push_back(&P);
139
140    multikeySort(Strings, 0);
141    initSize();
142
143    StringRef Previous;
144    for (StringPair *P : Strings) {
145      StringRef S = P->first.val();
146      if (Previous.endswith(S)) {
147        size_t Pos = Size - S.size() - (K != RAW);
148        if (!(Pos & (Alignment - 1))) {
149          P->second = Pos;
150          continue;
151        }
152      }
153
154      Size = alignTo(Size, Alignment);
155      P->second = Size;
156
157      Size += S.size();
158      if (K != RAW)
159        ++Size;
160      Previous = S;
161    }
162  }
163
164  if (K == MachO)
165    Size = alignTo(Size, 4); // Pad to multiple of 4.
166
167  // The first byte in an ELF string table must be null, according to the ELF
168  // specification. In 'initSize()' we reserved the first byte to hold null for
169  // this purpose and here we actually add the string to allow 'getOffset()' to
170  // be called on an empty string.
171  if (K == ELF)
172    StringIndexMap[CachedHashStringRef("")] = 0;
173}
174
175void StringTableBuilder::clear() {
176  Finalized = false;
177  StringIndexMap.clear();
178}
179
180size_t StringTableBuilder::getOffset(CachedHashStringRef S) const {
181  assert(isFinalized());
182  auto I = StringIndexMap.find(S);
183  assert(I != StringIndexMap.end() && "String is not in table!");
184  return I->second;
185}
186
187size_t StringTableBuilder::add(CachedHashStringRef S) {
188  if (K == WinCOFF)
189    assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
190
191  assert(!isFinalized());
192  auto P = StringIndexMap.insert(std::make_pair(S, 0));
193  if (P.second) {
194    size_t Start = alignTo(Size, Alignment);
195    P.first->second = Start;
196    Size = Start + S.size() + (K != RAW);
197  }
198  return P.first->second;
199}
200