1171169Smlaier//===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===//
2171169Smlaier//
3171169Smlaier// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4171169Smlaier// See https://llvm.org/LICENSE.txt for license information.
5171169Smlaier// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6171169Smlaier//
7171169Smlaier//===----------------------------------------------------------------------===//
8171169Smlaier
9171169Smlaier#ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H
10171169Smlaier#define LLVM_SUPPORT_BINARYSTREAMARRAY_H
11171169Smlaier
12171169Smlaier#include "llvm/ADT/ArrayRef.h"
13171169Smlaier#include "llvm/ADT/iterator.h"
14171169Smlaier#include "llvm/Support/BinaryStreamRef.h"
15171169Smlaier#include "llvm/Support/Error.h"
16171169Smlaier#include <cassert>
17171169Smlaier#include <cstdint>
18171169Smlaier
19171169Smlaier/// Lightweight arrays that are backed by an arbitrary BinaryStream.  This file
20171169Smlaier/// provides two different array implementations.
21171169Smlaier///
22171169Smlaier///     VarStreamArray - Arrays of variable length records.  The user specifies
23171169Smlaier///       an Extractor type that can extract a record from a given offset and
24171169Smlaier///       return the number of bytes consumed by the record.
25171169Smlaier///
26171169Smlaier///     FixedStreamArray - Arrays of fixed length records.  This is similar in
27171169Smlaier///       spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the
28171169Smlaier///       elements of the array need not be laid out in contiguous memory.
29171169Smlaiernamespace llvm {
30171169Smlaier
31171169Smlaier/// VarStreamArrayExtractor is intended to be specialized to provide customized
32171169Smlaier/// extraction logic.  On input it receives a BinaryStreamRef pointing to the
33171169Smlaier/// beginning of the next record, but where the length of the record is not yet
34171169Smlaier/// known.  Upon completion, it should return an appropriate Error instance if
35171169Smlaier/// a record could not be extracted, or if one could be extracted it should
36171169Smlaier/// return success and set Len to the number of bytes this record occupied in
37171169Smlaier/// the underlying stream, and it should fill out the fields of the value type
38171169Smlaier/// Item appropriately to represent the current record.
39171169Smlaier///
40171169Smlaier/// You can specialize this template for your own custom value types to avoid
41171169Smlaier/// having to specify a second template argument to VarStreamArray (documented
42171169Smlaier/// below).
43171169Smlaiertemplate <typename T> struct VarStreamArrayExtractor {
44  // Method intentionally deleted.  You must provide an explicit specialization
45  // with the following method implemented.
46  Error operator()(BinaryStreamRef Stream, uint32_t &Len,
47                   T &Item) const = delete;
48};
49
50/// VarStreamArray represents an array of variable length records backed by a
51/// stream.  This could be a contiguous sequence of bytes in memory, it could
52/// be a file on disk, or it could be a PDB stream where bytes are stored as
53/// discontiguous blocks in a file.  Usually it is desirable to treat arrays
54/// as contiguous blocks of memory, but doing so with large PDB files, for
55/// example, could mean allocating huge amounts of memory just to allow
56/// re-ordering of stream data to be contiguous before iterating over it.  By
57/// abstracting this out, we need not duplicate this memory, and we can
58/// iterate over arrays in arbitrarily formatted streams.  Elements are parsed
59/// lazily on iteration, so there is no upfront cost associated with building
60/// or copying a VarStreamArray, no matter how large it may be.
61///
62/// You create a VarStreamArray by specifying a ValueType and an Extractor type.
63/// If you do not specify an Extractor type, you are expected to specialize
64/// VarStreamArrayExtractor<T> for your ValueType.
65///
66/// By default an Extractor is default constructed in the class, but in some
67/// cases you might find it useful for an Extractor to maintain state across
68/// extractions.  In this case you can provide your own Extractor through a
69/// secondary constructor.  The following examples show various ways of
70/// creating a VarStreamArray.
71///
72///       // Will use VarStreamArrayExtractor<MyType> as the extractor.
73///       VarStreamArray<MyType> MyTypeArray;
74///
75///       // Will use a default-constructed MyExtractor as the extractor.
76///       VarStreamArray<MyType, MyExtractor> MyTypeArray2;
77///
78///       // Will use the specific instance of MyExtractor provided.
79///       // MyExtractor need not be default-constructible in this case.
80///       MyExtractor E(SomeContext);
81///       VarStreamArray<MyType, MyExtractor> MyTypeArray3(E);
82///
83
84template <typename ValueType, typename Extractor> class VarStreamArrayIterator;
85
86template <typename ValueType,
87          typename Extractor = VarStreamArrayExtractor<ValueType>>
88class VarStreamArray {
89  friend class VarStreamArrayIterator<ValueType, Extractor>;
90
91public:
92  typedef VarStreamArrayIterator<ValueType, Extractor> Iterator;
93
94  VarStreamArray() = default;
95
96  explicit VarStreamArray(const Extractor &E) : E(E) {}
97
98  explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0)
99      : Stream(Stream), Skew(Skew) {}
100
101  VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0)
102      : Stream(Stream), E(E), Skew(Skew) {}
103
104  Iterator begin(bool *HadError = nullptr) const {
105    return Iterator(*this, E, Skew, nullptr);
106  }
107
108  bool valid() const { return Stream.valid(); }
109
110  uint32_t skew() const { return Skew; }
111  Iterator end() const { return Iterator(E); }
112
113  bool empty() const { return Stream.getLength() == 0; }
114
115  VarStreamArray<ValueType, Extractor> substream(uint32_t Begin,
116                                                 uint32_t End) const {
117    assert(Begin >= Skew);
118    // We should never cut off the beginning of the stream since it might be
119    // skewed, meaning the initial bytes are important.
120    BinaryStreamRef NewStream = Stream.slice(0, End);
121    return {NewStream, E, Begin};
122  }
123
124  /// given an offset into the array's underlying stream, return an
125  /// iterator to the record at that offset.  This is considered unsafe
126  /// since the behavior is undefined if \p Offset does not refer to the
127  /// beginning of a valid record.
128  Iterator at(uint32_t Offset) const {
129    return Iterator(*this, E, Offset, nullptr);
130  }
131
132  const Extractor &getExtractor() const { return E; }
133  Extractor &getExtractor() { return E; }
134
135  BinaryStreamRef getUnderlyingStream() const { return Stream; }
136  void setUnderlyingStream(BinaryStreamRef NewStream, uint32_t NewSkew = 0) {
137    Stream = NewStream;
138    Skew = NewSkew;
139  }
140
141  void drop_front() { Skew += begin()->length(); }
142
143private:
144  BinaryStreamRef Stream;
145  Extractor E;
146  uint32_t Skew = 0;
147};
148
149template <typename ValueType, typename Extractor>
150class VarStreamArrayIterator
151    : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
152                                  std::forward_iterator_tag, ValueType> {
153  typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
154  typedef VarStreamArray<ValueType, Extractor> ArrayType;
155
156public:
157  VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
158                         uint32_t Offset, bool *HadError)
159      : IterRef(Array.Stream.drop_front(Offset)), Extract(E),
160        Array(&Array), AbsOffset(Offset), HadError(HadError) {
161    if (IterRef.getLength() == 0)
162      moveToEnd();
163    else {
164      auto EC = Extract(IterRef, ThisLen, ThisValue);
165      if (EC) {
166        consumeError(std::move(EC));
167        markError();
168      }
169    }
170  }
171
172  VarStreamArrayIterator() = default;
173  explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {}
174  ~VarStreamArrayIterator() = default;
175
176  bool operator==(const IterType &R) const {
177    if (Array && R.Array) {
178      // Both have a valid array, make sure they're same.
179      assert(Array == R.Array);
180      return IterRef == R.IterRef;
181    }
182
183    // Both iterators are at the end.
184    if (!Array && !R.Array)
185      return true;
186
187    // One is not at the end and one is.
188    return false;
189  }
190
191  const ValueType &operator*() const {
192    assert(Array && !HasError);
193    return ThisValue;
194  }
195
196  ValueType &operator*() {
197    assert(Array && !HasError);
198    return ThisValue;
199  }
200
201  IterType &operator+=(unsigned N) {
202    for (unsigned I = 0; I < N; ++I) {
203      // We are done with the current record, discard it so that we are
204      // positioned at the next record.
205      AbsOffset += ThisLen;
206      IterRef = IterRef.drop_front(ThisLen);
207      if (IterRef.getLength() == 0) {
208        // There is nothing after the current record, we must make this an end
209        // iterator.
210        moveToEnd();
211      } else {
212        // There is some data after the current record.
213        auto EC = Extract(IterRef, ThisLen, ThisValue);
214        if (EC) {
215          consumeError(std::move(EC));
216          markError();
217        } else if (ThisLen == 0) {
218          // An empty record? Make this an end iterator.
219          moveToEnd();
220        }
221      }
222    }
223    return *this;
224  }
225
226  uint32_t offset() const { return AbsOffset; }
227  uint32_t getRecordLength() const { return ThisLen; }
228
229private:
230  void moveToEnd() {
231    Array = nullptr;
232    ThisLen = 0;
233  }
234  void markError() {
235    moveToEnd();
236    HasError = true;
237    if (HadError != nullptr)
238      *HadError = true;
239  }
240
241  ValueType ThisValue;
242  BinaryStreamRef IterRef;
243  Extractor Extract;
244  const ArrayType *Array{nullptr};
245  uint32_t ThisLen{0};
246  uint32_t AbsOffset{0};
247  bool HasError{false};
248  bool *HadError{nullptr};
249};
250
251template <typename T> class FixedStreamArrayIterator;
252
253/// FixedStreamArray is similar to VarStreamArray, except with each record
254/// having a fixed-length.  As with VarStreamArray, there is no upfront
255/// cost associated with building or copying a FixedStreamArray, as the
256/// memory for each element is not read from the backing stream until that
257/// element is iterated.
258template <typename T> class FixedStreamArray {
259  friend class FixedStreamArrayIterator<T>;
260
261public:
262  typedef FixedStreamArrayIterator<T> Iterator;
263
264  FixedStreamArray() = default;
265  explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) {
266    assert(Stream.getLength() % sizeof(T) == 0);
267  }
268
269  bool operator==(const FixedStreamArray<T> &Other) const {
270    return Stream == Other.Stream;
271  }
272
273  bool operator!=(const FixedStreamArray<T> &Other) const {
274    return !(*this == Other);
275  }
276
277  FixedStreamArray(const FixedStreamArray &) = default;
278  FixedStreamArray &operator=(const FixedStreamArray &) = default;
279
280  const T &operator[](uint32_t Index) const {
281    assert(Index < size());
282    uint32_t Off = Index * sizeof(T);
283    ArrayRef<uint8_t> Data;
284    if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) {
285      assert(false && "Unexpected failure reading from stream");
286      // This should never happen since we asserted that the stream length was
287      // an exact multiple of the element size.
288      consumeError(std::move(EC));
289    }
290    assert(isAddrAligned(Align::Of<T>(), Data.data()));
291    return *reinterpret_cast<const T *>(Data.data());
292  }
293
294  uint32_t size() const { return Stream.getLength() / sizeof(T); }
295
296  bool empty() const { return size() == 0; }
297
298  FixedStreamArrayIterator<T> begin() const {
299    return FixedStreamArrayIterator<T>(*this, 0);
300  }
301
302  FixedStreamArrayIterator<T> end() const {
303    return FixedStreamArrayIterator<T>(*this, size());
304  }
305
306  const T &front() const { return *begin(); }
307  const T &back() const {
308    FixedStreamArrayIterator<T> I = end();
309    return *(--I);
310  }
311
312  BinaryStreamRef getUnderlyingStream() const { return Stream; }
313
314private:
315  BinaryStreamRef Stream;
316};
317
318template <typename T>
319class FixedStreamArrayIterator
320    : public iterator_facade_base<FixedStreamArrayIterator<T>,
321                                  std::random_access_iterator_tag, const T> {
322
323public:
324  FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index)
325      : Array(Array), Index(Index) {}
326
327  FixedStreamArrayIterator<T>(const FixedStreamArrayIterator<T> &Other)
328      : Array(Other.Array), Index(Other.Index) {}
329  FixedStreamArrayIterator<T> &
330  operator=(const FixedStreamArrayIterator<T> &Other) {
331    Array = Other.Array;
332    Index = Other.Index;
333    return *this;
334  }
335
336  const T &operator*() const { return Array[Index]; }
337  const T &operator*() { return Array[Index]; }
338
339  bool operator==(const FixedStreamArrayIterator<T> &R) const {
340    assert(Array == R.Array);
341    return (Index == R.Index) && (Array == R.Array);
342  }
343
344  FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) {
345    Index += N;
346    return *this;
347  }
348
349  FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) {
350    assert(std::ptrdiff_t(Index) >= N);
351    Index -= N;
352    return *this;
353  }
354
355  std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const {
356    assert(Array == R.Array);
357    assert(Index >= R.Index);
358    return Index - R.Index;
359  }
360
361  bool operator<(const FixedStreamArrayIterator<T> &RHS) const {
362    assert(Array == RHS.Array);
363    return Index < RHS.Index;
364  }
365
366private:
367  FixedStreamArray<T> Array;
368  uint32_t Index;
369};
370
371} // namespace llvm
372
373#endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H
374