1//===- RewriteRope.h - Rope specialized for rewriter ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//  This file defines the RewriteRope class, which is a powerful string class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_CLANG_REWRITE_CORE_REWRITEROPE_H
14#define LLVM_CLANG_REWRITE_CORE_REWRITEROPE_H
15
16#include "llvm/ADT/IntrusiveRefCntPtr.h"
17#include "llvm/ADT/StringRef.h"
18#include <cassert>
19#include <cstddef>
20#include <iterator>
21#include <utility>
22
23namespace clang {
24
25  //===--------------------------------------------------------------------===//
26  // RopeRefCountString Class
27  //===--------------------------------------------------------------------===//
28
29  /// RopeRefCountString - This struct is allocated with 'new char[]' from the
30  /// heap, and represents a reference counted chunk of string data.  When its
31  /// ref count drops to zero, it is delete[]'d.  This is primarily managed
32  /// through the RopePiece class below.
33  struct RopeRefCountString {
34    unsigned RefCount;
35    char Data[1];  //  Variable sized.
36
37    void Retain() { ++RefCount; }
38
39    void Release() {
40      assert(RefCount > 0 && "Reference count is already zero.");
41      if (--RefCount == 0)
42        delete [] (char*)this;
43    }
44  };
45
46  //===--------------------------------------------------------------------===//
47  // RopePiece Class
48  //===--------------------------------------------------------------------===//
49
50  /// RopePiece - This class represents a view into a RopeRefCountString object.
51  /// This allows references to string data to be efficiently chopped up and
52  /// moved around without having to push around the string data itself.
53  ///
54  /// For example, we could have a 1M RopePiece and want to insert something
55  /// into the middle of it.  To do this, we split it into two RopePiece objects
56  /// that both refer to the same underlying RopeRefCountString (just with
57  /// different offsets) which is a nice constant time operation.
58  struct RopePiece {
59    llvm::IntrusiveRefCntPtr<RopeRefCountString> StrData;
60    unsigned StartOffs = 0;
61    unsigned EndOffs = 0;
62
63    RopePiece() = default;
64    RopePiece(llvm::IntrusiveRefCntPtr<RopeRefCountString> Str, unsigned Start,
65              unsigned End)
66        : StrData(std::move(Str)), StartOffs(Start), EndOffs(End) {}
67
68    const char &operator[](unsigned Offset) const {
69      return StrData->Data[Offset+StartOffs];
70    }
71    char &operator[](unsigned Offset) {
72      return StrData->Data[Offset+StartOffs];
73    }
74
75    unsigned size() const { return EndOffs-StartOffs; }
76  };
77
78  //===--------------------------------------------------------------------===//
79  // RopePieceBTreeIterator Class
80  //===--------------------------------------------------------------------===//
81
82  /// RopePieceBTreeIterator - This class provides read-only forward iteration
83  /// over bytes that are in a RopePieceBTree.  This first iterates over bytes
84  /// in a RopePiece, then iterates over RopePiece's in a RopePieceBTreeLeaf,
85  /// then iterates over RopePieceBTreeLeaf's in a RopePieceBTree.
86  class RopePieceBTreeIterator :
87      public std::iterator<std::forward_iterator_tag, const char, ptrdiff_t> {
88    /// CurNode - The current B+Tree node that we are inspecting.
89    const void /*RopePieceBTreeLeaf*/ *CurNode = nullptr;
90
91    /// CurPiece - The current RopePiece in the B+Tree node that we're
92    /// inspecting.
93    const RopePiece *CurPiece = nullptr;
94
95    /// CurChar - The current byte in the RopePiece we are pointing to.
96    unsigned CurChar = 0;
97
98  public:
99    RopePieceBTreeIterator() = default;
100    RopePieceBTreeIterator(const void /*RopePieceBTreeNode*/ *N);
101
102    char operator*() const {
103      return (*CurPiece)[CurChar];
104    }
105
106    bool operator==(const RopePieceBTreeIterator &RHS) const {
107      return CurPiece == RHS.CurPiece && CurChar == RHS.CurChar;
108    }
109    bool operator!=(const RopePieceBTreeIterator &RHS) const {
110      return !operator==(RHS);
111    }
112
113    RopePieceBTreeIterator& operator++() {   // Preincrement
114      if (CurChar+1 < CurPiece->size())
115        ++CurChar;
116      else
117        MoveToNextPiece();
118      return *this;
119    }
120
121    RopePieceBTreeIterator operator++(int) { // Postincrement
122      RopePieceBTreeIterator tmp = *this; ++*this; return tmp;
123    }
124
125    llvm::StringRef piece() const {
126      return llvm::StringRef(&(*CurPiece)[0], CurPiece->size());
127    }
128
129    void MoveToNextPiece();
130  };
131
132  //===--------------------------------------------------------------------===//
133  // RopePieceBTree Class
134  //===--------------------------------------------------------------------===//
135
136  class RopePieceBTree {
137    void /*RopePieceBTreeNode*/ *Root;
138
139  public:
140    RopePieceBTree();
141    RopePieceBTree(const RopePieceBTree &RHS);
142    RopePieceBTree &operator=(const RopePieceBTree &) = delete;
143    ~RopePieceBTree();
144
145    using iterator = RopePieceBTreeIterator;
146
147    iterator begin() const { return iterator(Root); }
148    iterator end() const { return iterator(); }
149    unsigned size() const;
150    unsigned empty() const { return size() == 0; }
151
152    void clear();
153
154    void insert(unsigned Offset, const RopePiece &R);
155
156    void erase(unsigned Offset, unsigned NumBytes);
157  };
158
159  //===--------------------------------------------------------------------===//
160  // RewriteRope Class
161  //===--------------------------------------------------------------------===//
162
163/// RewriteRope - A powerful string class.  This class supports extremely
164/// efficient insertions and deletions into the middle of it, even for
165/// ridiculously long strings.
166class RewriteRope {
167  RopePieceBTree Chunks;
168
169  /// We allocate space for string data out of a buffer of size AllocChunkSize.
170  /// This keeps track of how much space is left.
171  llvm::IntrusiveRefCntPtr<RopeRefCountString> AllocBuffer;
172  enum { AllocChunkSize = 4080 };
173  unsigned AllocOffs = AllocChunkSize;
174
175public:
176  RewriteRope() = default;
177  RewriteRope(const RewriteRope &RHS) : Chunks(RHS.Chunks) {}
178
179  using iterator = RopePieceBTree::iterator;
180  using const_iterator = RopePieceBTree::iterator;
181
182  iterator begin() const { return Chunks.begin(); }
183  iterator end() const { return Chunks.end(); }
184  unsigned size() const { return Chunks.size(); }
185
186  void clear() {
187    Chunks.clear();
188  }
189
190  void assign(const char *Start, const char *End) {
191    clear();
192    if (Start != End)
193      Chunks.insert(0, MakeRopeString(Start, End));
194  }
195
196  void insert(unsigned Offset, const char *Start, const char *End) {
197    assert(Offset <= size() && "Invalid position to insert!");
198    if (Start == End) return;
199    Chunks.insert(Offset, MakeRopeString(Start, End));
200  }
201
202  void erase(unsigned Offset, unsigned NumBytes) {
203    assert(Offset+NumBytes <= size() && "Invalid region to erase!");
204    if (NumBytes == 0) return;
205    Chunks.erase(Offset, NumBytes);
206  }
207
208private:
209  RopePiece MakeRopeString(const char *Start, const char *End);
210};
211
212} // namespace clang
213
214#endif // LLVM_CLANG_REWRITE_CORE_REWRITEROPE_H
215