1274958Sdim//===--- RewriteRope.cpp - Rope specialized for rewriter --------*- C++ -*-===//
2274958Sdim//
3274958Sdim//                     The LLVM Compiler Infrastructure
4274958Sdim//
5274958Sdim// This file is distributed under the University of Illinois Open Source
6274958Sdim// License. See LICENSE.TXT for details.
7274958Sdim//
8274958Sdim//===----------------------------------------------------------------------===//
9274958Sdim//
10274958Sdim//  This file implements the RewriteRope class, which is a powerful string.
11274958Sdim//
12274958Sdim//===----------------------------------------------------------------------===//
13274958Sdim
14274958Sdim#include "clang/Rewrite/Core/RewriteRope.h"
15274958Sdim#include "clang/Basic/LLVM.h"
16274958Sdim#include <algorithm>
17274958Sdimusing namespace clang;
18274958Sdim
19274958Sdim/// RewriteRope is a "strong" string class, designed to make insertions and
20274958Sdim/// deletions in the middle of the string nearly constant time (really, they are
21274958Sdim/// O(log N), but with a very low constant factor).
22274958Sdim///
23274958Sdim/// The implementation of this datastructure is a conceptual linear sequence of
24274958Sdim/// RopePiece elements.  Each RopePiece represents a view on a separately
25274958Sdim/// allocated and reference counted string.  This means that splitting a very
26274958Sdim/// long string can be done in constant time by splitting a RopePiece that
27274958Sdim/// references the whole string into two rope pieces that reference each half.
28274958Sdim/// Once split, another string can be inserted in between the two halves by
29274958Sdim/// inserting a RopePiece in between the two others.  All of this is very
30274958Sdim/// inexpensive: it takes time proportional to the number of RopePieces, not the
31274958Sdim/// length of the strings they represent.
32274958Sdim///
33274958Sdim/// While a linear sequences of RopePieces is the conceptual model, the actual
34274958Sdim/// implementation captures them in an adapted B+ Tree.  Using a B+ tree (which
35274958Sdim/// is a tree that keeps the values in the leaves and has where each node
36274958Sdim/// contains a reasonable number of pointers to children/values) allows us to
37274958Sdim/// maintain efficient operation when the RewriteRope contains a *huge* number
38274958Sdim/// of RopePieces.  The basic idea of the B+ Tree is that it allows us to find
39274958Sdim/// the RopePiece corresponding to some offset very efficiently, and it
40274958Sdim/// automatically balances itself on insertions of RopePieces (which can happen
41274958Sdim/// for both insertions and erases of string ranges).
42274958Sdim///
43274958Sdim/// The one wrinkle on the theory is that we don't attempt to keep the tree
44274958Sdim/// properly balanced when erases happen.  Erases of string data can both insert
45274958Sdim/// new RopePieces (e.g. when the middle of some other rope piece is deleted,
46274958Sdim/// which results in two rope pieces, which is just like an insert) or it can
47274958Sdim/// reduce the number of RopePieces maintained by the B+Tree.  In the case when
48274958Sdim/// the number of RopePieces is reduced, we don't attempt to maintain the
49274958Sdim/// standard 'invariant' that each node in the tree contains at least
50274958Sdim/// 'WidthFactor' children/values.  For our use cases, this doesn't seem to
51274958Sdim/// matter.
52274958Sdim///
53274958Sdim/// The implementation below is primarily implemented in terms of three classes:
54274958Sdim///   RopePieceBTreeNode - Common base class for:
55274958Sdim///
56274958Sdim///     RopePieceBTreeLeaf - Directly manages up to '2*WidthFactor' RopePiece
57274958Sdim///          nodes.  This directly represents a chunk of the string with those
58274958Sdim///          RopePieces contatenated.
59274958Sdim///     RopePieceBTreeInterior - An interior node in the B+ Tree, which manages
60274958Sdim///          up to '2*WidthFactor' other nodes in the tree.
61274958Sdim
62274958Sdim
63274958Sdim//===----------------------------------------------------------------------===//
64274958Sdim// RopePieceBTreeNode Class
65274958Sdim//===----------------------------------------------------------------------===//
66274958Sdim
67274958Sdimnamespace {
68274958Sdim  /// RopePieceBTreeNode - Common base class of RopePieceBTreeLeaf and
69274958Sdim  /// RopePieceBTreeInterior.  This provides some 'virtual' dispatching methods
70274958Sdim  /// and a flag that determines which subclass the instance is.  Also
71274958Sdim  /// important, this node knows the full extend of the node, including any
72274958Sdim  /// children that it has.  This allows efficient skipping over entire subtrees
73274958Sdim  /// when looking for an offset in the BTree.
74274958Sdim  class RopePieceBTreeNode {
75274958Sdim  protected:
76274958Sdim    /// WidthFactor - This controls the number of K/V slots held in the BTree:
77274958Sdim    /// how wide it is.  Each level of the BTree is guaranteed to have at least
78274958Sdim    /// 'WidthFactor' elements in it (either ropepieces or children), (except
79274958Sdim    /// the root, which may have less) and may have at most 2*WidthFactor
80274958Sdim    /// elements.
81274958Sdim    enum { WidthFactor = 8 };
82274958Sdim
83274958Sdim    /// Size - This is the number of bytes of file this node (including any
84274958Sdim    /// potential children) covers.
85274958Sdim    unsigned Size;
86274958Sdim
87274958Sdim    /// IsLeaf - True if this is an instance of RopePieceBTreeLeaf, false if it
88274958Sdim    /// is an instance of RopePieceBTreeInterior.
89274958Sdim    bool IsLeaf;
90274958Sdim
91274958Sdim    RopePieceBTreeNode(bool isLeaf) : Size(0), IsLeaf(isLeaf) {}
92288943Sdim    ~RopePieceBTreeNode() = default;
93288943Sdim
94274958Sdim  public:
95274958Sdim    bool isLeaf() const { return IsLeaf; }
96274958Sdim    unsigned size() const { return Size; }
97274958Sdim
98274958Sdim    void Destroy();
99274958Sdim
100274958Sdim    /// split - Split the range containing the specified offset so that we are
101274958Sdim    /// guaranteed that there is a place to do an insertion at the specified
102274958Sdim    /// offset.  The offset is relative, so "0" is the start of the node.
103274958Sdim    ///
104274958Sdim    /// If there is no space in this subtree for the extra piece, the extra tree
105274958Sdim    /// node is returned and must be inserted into a parent.
106274958Sdim    RopePieceBTreeNode *split(unsigned Offset);
107274958Sdim
108274958Sdim    /// insert - Insert the specified ropepiece into this tree node at the
109274958Sdim    /// specified offset.  The offset is relative, so "0" is the start of the
110274958Sdim    /// node.
111274958Sdim    ///
112274958Sdim    /// If there is no space in this subtree for the extra piece, the extra tree
113274958Sdim    /// node is returned and must be inserted into a parent.
114274958Sdim    RopePieceBTreeNode *insert(unsigned Offset, const RopePiece &R);
115274958Sdim
116274958Sdim    /// erase - Remove NumBytes from this node at the specified offset.  We are
117274958Sdim    /// guaranteed that there is a split at Offset.
118274958Sdim    void erase(unsigned Offset, unsigned NumBytes);
119274958Sdim
120274958Sdim  };
121274958Sdim} // end anonymous namespace
122274958Sdim
123274958Sdim//===----------------------------------------------------------------------===//
124274958Sdim// RopePieceBTreeLeaf Class
125274958Sdim//===----------------------------------------------------------------------===//
126274958Sdim
127274958Sdimnamespace {
128274958Sdim  /// RopePieceBTreeLeaf - Directly manages up to '2*WidthFactor' RopePiece
129274958Sdim  /// nodes.  This directly represents a chunk of the string with those
130274958Sdim  /// RopePieces contatenated.  Since this is a B+Tree, all values (in this case
131274958Sdim  /// instances of RopePiece) are stored in leaves like this.  To make iteration
132274958Sdim  /// over the leaves efficient, they maintain a singly linked list through the
133274958Sdim  /// NextLeaf field.  This allows the B+Tree forward iterator to be constant
134274958Sdim  /// time for all increments.
135274958Sdim  class RopePieceBTreeLeaf : public RopePieceBTreeNode {
136274958Sdim    /// NumPieces - This holds the number of rope pieces currently active in the
137274958Sdim    /// Pieces array.
138274958Sdim    unsigned char NumPieces;
139274958Sdim
140274958Sdim    /// Pieces - This tracks the file chunks currently in this leaf.
141274958Sdim    ///
142274958Sdim    RopePiece Pieces[2*WidthFactor];
143274958Sdim
144274958Sdim    /// NextLeaf - This is a pointer to the next leaf in the tree, allowing
145274958Sdim    /// efficient in-order forward iteration of the tree without traversal.
146274958Sdim    RopePieceBTreeLeaf **PrevLeaf, *NextLeaf;
147274958Sdim  public:
148274958Sdim    RopePieceBTreeLeaf() : RopePieceBTreeNode(true), NumPieces(0),
149274958Sdim                           PrevLeaf(nullptr), NextLeaf(nullptr) {}
150274958Sdim    ~RopePieceBTreeLeaf() {
151274958Sdim      if (PrevLeaf || NextLeaf)
152274958Sdim        removeFromLeafInOrder();
153274958Sdim      clear();
154274958Sdim    }
155274958Sdim
156274958Sdim    bool isFull() const { return NumPieces == 2*WidthFactor; }
157274958Sdim
158274958Sdim    /// clear - Remove all rope pieces from this leaf.
159274958Sdim    void clear() {
160274958Sdim      while (NumPieces)
161274958Sdim        Pieces[--NumPieces] = RopePiece();
162274958Sdim      Size = 0;
163274958Sdim    }
164274958Sdim
165274958Sdim    unsigned getNumPieces() const { return NumPieces; }
166274958Sdim
167274958Sdim    const RopePiece &getPiece(unsigned i) const {
168274958Sdim      assert(i < getNumPieces() && "Invalid piece ID");
169274958Sdim      return Pieces[i];
170274958Sdim    }
171274958Sdim
172274958Sdim    const RopePieceBTreeLeaf *getNextLeafInOrder() const { return NextLeaf; }
173274958Sdim    void insertAfterLeafInOrder(RopePieceBTreeLeaf *Node) {
174274958Sdim      assert(!PrevLeaf && !NextLeaf && "Already in ordering");
175274958Sdim
176274958Sdim      NextLeaf = Node->NextLeaf;
177274958Sdim      if (NextLeaf)
178274958Sdim        NextLeaf->PrevLeaf = &NextLeaf;
179274958Sdim      PrevLeaf = &Node->NextLeaf;
180274958Sdim      Node->NextLeaf = this;
181274958Sdim    }
182274958Sdim
183274958Sdim    void removeFromLeafInOrder() {
184274958Sdim      if (PrevLeaf) {
185274958Sdim        *PrevLeaf = NextLeaf;
186274958Sdim        if (NextLeaf)
187274958Sdim          NextLeaf->PrevLeaf = PrevLeaf;
188274958Sdim      } else if (NextLeaf) {
189274958Sdim        NextLeaf->PrevLeaf = nullptr;
190274958Sdim      }
191274958Sdim    }
192274958Sdim
193274958Sdim    /// FullRecomputeSizeLocally - This method recomputes the 'Size' field by
194274958Sdim    /// summing the size of all RopePieces.
195274958Sdim    void FullRecomputeSizeLocally() {
196274958Sdim      Size = 0;
197274958Sdim      for (unsigned i = 0, e = getNumPieces(); i != e; ++i)
198274958Sdim        Size += getPiece(i).size();
199274958Sdim    }
200274958Sdim
201274958Sdim    /// split - Split the range containing the specified offset so that we are
202274958Sdim    /// guaranteed that there is a place to do an insertion at the specified
203274958Sdim    /// offset.  The offset is relative, so "0" is the start of the node.
204274958Sdim    ///
205274958Sdim    /// If there is no space in this subtree for the extra piece, the extra tree
206274958Sdim    /// node is returned and must be inserted into a parent.
207274958Sdim    RopePieceBTreeNode *split(unsigned Offset);
208274958Sdim
209274958Sdim    /// insert - Insert the specified ropepiece into this tree node at the
210274958Sdim    /// specified offset.  The offset is relative, so "0" is the start of the
211274958Sdim    /// node.
212274958Sdim    ///
213274958Sdim    /// If there is no space in this subtree for the extra piece, the extra tree
214274958Sdim    /// node is returned and must be inserted into a parent.
215274958Sdim    RopePieceBTreeNode *insert(unsigned Offset, const RopePiece &R);
216274958Sdim
217274958Sdim
218274958Sdim    /// erase - Remove NumBytes from this node at the specified offset.  We are
219274958Sdim    /// guaranteed that there is a split at Offset.
220274958Sdim    void erase(unsigned Offset, unsigned NumBytes);
221274958Sdim
222274958Sdim    static inline bool classof(const RopePieceBTreeNode *N) {
223274958Sdim      return N->isLeaf();
224274958Sdim    }
225274958Sdim  };
226274958Sdim} // end anonymous namespace
227274958Sdim
228274958Sdim/// split - Split the range containing the specified offset so that we are
229274958Sdim/// guaranteed that there is a place to do an insertion at the specified
230274958Sdim/// offset.  The offset is relative, so "0" is the start of the node.
231274958Sdim///
232274958Sdim/// If there is no space in this subtree for the extra piece, the extra tree
233274958Sdim/// node is returned and must be inserted into a parent.
234274958SdimRopePieceBTreeNode *RopePieceBTreeLeaf::split(unsigned Offset) {
235274958Sdim  // Find the insertion point.  We are guaranteed that there is a split at the
236274958Sdim  // specified offset so find it.
237274958Sdim  if (Offset == 0 || Offset == size()) {
238274958Sdim    // Fastpath for a common case.  There is already a splitpoint at the end.
239274958Sdim    return nullptr;
240274958Sdim  }
241274958Sdim
242274958Sdim  // Find the piece that this offset lands in.
243274958Sdim  unsigned PieceOffs = 0;
244274958Sdim  unsigned i = 0;
245274958Sdim  while (Offset >= PieceOffs+Pieces[i].size()) {
246274958Sdim    PieceOffs += Pieces[i].size();
247274958Sdim    ++i;
248274958Sdim  }
249274958Sdim
250274958Sdim  // If there is already a split point at the specified offset, just return
251274958Sdim  // success.
252274958Sdim  if (PieceOffs == Offset)
253274958Sdim    return nullptr;
254274958Sdim
255274958Sdim  // Otherwise, we need to split piece 'i' at Offset-PieceOffs.  Convert Offset
256274958Sdim  // to being Piece relative.
257274958Sdim  unsigned IntraPieceOffset = Offset-PieceOffs;
258274958Sdim
259274958Sdim  // We do this by shrinking the RopePiece and then doing an insert of the tail.
260274958Sdim  RopePiece Tail(Pieces[i].StrData, Pieces[i].StartOffs+IntraPieceOffset,
261274958Sdim                 Pieces[i].EndOffs);
262274958Sdim  Size -= Pieces[i].size();
263274958Sdim  Pieces[i].EndOffs = Pieces[i].StartOffs+IntraPieceOffset;
264274958Sdim  Size += Pieces[i].size();
265274958Sdim
266274958Sdim  return insert(Offset, Tail);
267274958Sdim}
268274958Sdim
269274958Sdim
270274958Sdim/// insert - Insert the specified RopePiece into this tree node at the
271274958Sdim/// specified offset.  The offset is relative, so "0" is the start of the node.
272274958Sdim///
273274958Sdim/// If there is no space in this subtree for the extra piece, the extra tree
274274958Sdim/// node is returned and must be inserted into a parent.
275274958SdimRopePieceBTreeNode *RopePieceBTreeLeaf::insert(unsigned Offset,
276274958Sdim                                               const RopePiece &R) {
277274958Sdim  // If this node is not full, insert the piece.
278274958Sdim  if (!isFull()) {
279274958Sdim    // Find the insertion point.  We are guaranteed that there is a split at the
280274958Sdim    // specified offset so find it.
281274958Sdim    unsigned i = 0, e = getNumPieces();
282274958Sdim    if (Offset == size()) {
283274958Sdim      // Fastpath for a common case.
284274958Sdim      i = e;
285274958Sdim    } else {
286274958Sdim      unsigned SlotOffs = 0;
287274958Sdim      for (; Offset > SlotOffs; ++i)
288274958Sdim        SlotOffs += getPiece(i).size();
289274958Sdim      assert(SlotOffs == Offset && "Split didn't occur before insertion!");
290274958Sdim    }
291274958Sdim
292274958Sdim    // For an insertion into a non-full leaf node, just insert the value in
293274958Sdim    // its sorted position.  This requires moving later values over.
294274958Sdim    for (; i != e; --e)
295274958Sdim      Pieces[e] = Pieces[e-1];
296274958Sdim    Pieces[i] = R;
297274958Sdim    ++NumPieces;
298274958Sdim    Size += R.size();
299274958Sdim    return nullptr;
300274958Sdim  }
301274958Sdim
302274958Sdim  // Otherwise, if this is leaf is full, split it in two halves.  Since this
303274958Sdim  // node is full, it contains 2*WidthFactor values.  We move the first
304274958Sdim  // 'WidthFactor' values to the LHS child (which we leave in this node) and
305274958Sdim  // move the last 'WidthFactor' values into the RHS child.
306274958Sdim
307274958Sdim  // Create the new node.
308274958Sdim  RopePieceBTreeLeaf *NewNode = new RopePieceBTreeLeaf();
309274958Sdim
310274958Sdim  // Move over the last 'WidthFactor' values from here to NewNode.
311274958Sdim  std::copy(&Pieces[WidthFactor], &Pieces[2*WidthFactor],
312274958Sdim            &NewNode->Pieces[0]);
313274958Sdim  // Replace old pieces with null RopePieces to drop refcounts.
314274958Sdim  std::fill(&Pieces[WidthFactor], &Pieces[2*WidthFactor], RopePiece());
315274958Sdim
316274958Sdim  // Decrease the number of values in the two nodes.
317274958Sdim  NewNode->NumPieces = NumPieces = WidthFactor;
318274958Sdim
319274958Sdim  // Recompute the two nodes' size.
320274958Sdim  NewNode->FullRecomputeSizeLocally();
321274958Sdim  FullRecomputeSizeLocally();
322274958Sdim
323274958Sdim  // Update the list of leaves.
324274958Sdim  NewNode->insertAfterLeafInOrder(this);
325274958Sdim
326274958Sdim  // These insertions can't fail.
327274958Sdim  if (this->size() >= Offset)
328274958Sdim    this->insert(Offset, R);
329274958Sdim  else
330274958Sdim    NewNode->insert(Offset - this->size(), R);
331274958Sdim  return NewNode;
332274958Sdim}
333274958Sdim
334274958Sdim/// erase - Remove NumBytes from this node at the specified offset.  We are
335274958Sdim/// guaranteed that there is a split at Offset.
336274958Sdimvoid RopePieceBTreeLeaf::erase(unsigned Offset, unsigned NumBytes) {
337274958Sdim  // Since we are guaranteed that there is a split at Offset, we start by
338274958Sdim  // finding the Piece that starts there.
339274958Sdim  unsigned PieceOffs = 0;
340274958Sdim  unsigned i = 0;
341274958Sdim  for (; Offset > PieceOffs; ++i)
342274958Sdim    PieceOffs += getPiece(i).size();
343274958Sdim  assert(PieceOffs == Offset && "Split didn't occur before erase!");
344274958Sdim
345274958Sdim  unsigned StartPiece = i;
346274958Sdim
347274958Sdim  // Figure out how many pieces completely cover 'NumBytes'.  We want to remove
348274958Sdim  // all of them.
349274958Sdim  for (; Offset+NumBytes > PieceOffs+getPiece(i).size(); ++i)
350274958Sdim    PieceOffs += getPiece(i).size();
351274958Sdim
352274958Sdim  // If we exactly include the last one, include it in the region to delete.
353274958Sdim  if (Offset+NumBytes == PieceOffs+getPiece(i).size())
354274958Sdim    PieceOffs += getPiece(i).size(), ++i;
355274958Sdim
356274958Sdim  // If we completely cover some RopePieces, erase them now.
357274958Sdim  if (i != StartPiece) {
358274958Sdim    unsigned NumDeleted = i-StartPiece;
359274958Sdim    for (; i != getNumPieces(); ++i)
360274958Sdim      Pieces[i-NumDeleted] = Pieces[i];
361274958Sdim
362274958Sdim    // Drop references to dead rope pieces.
363274958Sdim    std::fill(&Pieces[getNumPieces()-NumDeleted], &Pieces[getNumPieces()],
364274958Sdim              RopePiece());
365274958Sdim    NumPieces -= NumDeleted;
366274958Sdim
367274958Sdim    unsigned CoverBytes = PieceOffs-Offset;
368274958Sdim    NumBytes -= CoverBytes;
369274958Sdim    Size -= CoverBytes;
370274958Sdim  }
371274958Sdim
372274958Sdim  // If we completely removed some stuff, we could be done.
373274958Sdim  if (NumBytes == 0) return;
374274958Sdim
375274958Sdim  // Okay, now might be erasing part of some Piece.  If this is the case, then
376274958Sdim  // move the start point of the piece.
377274958Sdim  assert(getPiece(StartPiece).size() > NumBytes);
378274958Sdim  Pieces[StartPiece].StartOffs += NumBytes;
379274958Sdim
380274958Sdim  // The size of this node just shrunk by NumBytes.
381274958Sdim  Size -= NumBytes;
382274958Sdim}
383274958Sdim
384274958Sdim//===----------------------------------------------------------------------===//
385274958Sdim// RopePieceBTreeInterior Class
386274958Sdim//===----------------------------------------------------------------------===//
387274958Sdim
388274958Sdimnamespace {
389274958Sdim  /// RopePieceBTreeInterior - This represents an interior node in the B+Tree,
390274958Sdim  /// which holds up to 2*WidthFactor pointers to child nodes.
391274958Sdim  class RopePieceBTreeInterior : public RopePieceBTreeNode {
392274958Sdim    /// NumChildren - This holds the number of children currently active in the
393274958Sdim    /// Children array.
394274958Sdim    unsigned char NumChildren;
395274958Sdim    RopePieceBTreeNode *Children[2*WidthFactor];
396274958Sdim  public:
397274958Sdim    RopePieceBTreeInterior() : RopePieceBTreeNode(false), NumChildren(0) {}
398274958Sdim
399274958Sdim    RopePieceBTreeInterior(RopePieceBTreeNode *LHS, RopePieceBTreeNode *RHS)
400274958Sdim    : RopePieceBTreeNode(false) {
401274958Sdim      Children[0] = LHS;
402274958Sdim      Children[1] = RHS;
403274958Sdim      NumChildren = 2;
404274958Sdim      Size = LHS->size() + RHS->size();
405274958Sdim    }
406274958Sdim
407274958Sdim    ~RopePieceBTreeInterior() {
408274958Sdim      for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
409274958Sdim        Children[i]->Destroy();
410274958Sdim    }
411274958Sdim
412274958Sdim    bool isFull() const { return NumChildren == 2*WidthFactor; }
413274958Sdim
414274958Sdim    unsigned getNumChildren() const { return NumChildren; }
415274958Sdim    const RopePieceBTreeNode *getChild(unsigned i) const {
416274958Sdim      assert(i < NumChildren && "invalid child #");
417274958Sdim      return Children[i];
418274958Sdim    }
419274958Sdim    RopePieceBTreeNode *getChild(unsigned i) {
420274958Sdim      assert(i < NumChildren && "invalid child #");
421274958Sdim      return Children[i];
422274958Sdim    }
423274958Sdim
424274958Sdim    /// FullRecomputeSizeLocally - Recompute the Size field of this node by
425274958Sdim    /// summing up the sizes of the child nodes.
426274958Sdim    void FullRecomputeSizeLocally() {
427274958Sdim      Size = 0;
428274958Sdim      for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
429274958Sdim        Size += getChild(i)->size();
430274958Sdim    }
431274958Sdim
432274958Sdim
433274958Sdim    /// split - Split the range containing the specified offset so that we are
434274958Sdim    /// guaranteed that there is a place to do an insertion at the specified
435274958Sdim    /// offset.  The offset is relative, so "0" is the start of the node.
436274958Sdim    ///
437274958Sdim    /// If there is no space in this subtree for the extra piece, the extra tree
438274958Sdim    /// node is returned and must be inserted into a parent.
439274958Sdim    RopePieceBTreeNode *split(unsigned Offset);
440274958Sdim
441274958Sdim
442274958Sdim    /// insert - Insert the specified ropepiece into this tree node at the
443274958Sdim    /// specified offset.  The offset is relative, so "0" is the start of the
444274958Sdim    /// node.
445274958Sdim    ///
446274958Sdim    /// If there is no space in this subtree for the extra piece, the extra tree
447274958Sdim    /// node is returned and must be inserted into a parent.
448274958Sdim    RopePieceBTreeNode *insert(unsigned Offset, const RopePiece &R);
449274958Sdim
450274958Sdim    /// HandleChildPiece - A child propagated an insertion result up to us.
451274958Sdim    /// Insert the new child, and/or propagate the result further up the tree.
452274958Sdim    RopePieceBTreeNode *HandleChildPiece(unsigned i, RopePieceBTreeNode *RHS);
453274958Sdim
454274958Sdim    /// erase - Remove NumBytes from this node at the specified offset.  We are
455274958Sdim    /// guaranteed that there is a split at Offset.
456274958Sdim    void erase(unsigned Offset, unsigned NumBytes);
457274958Sdim
458274958Sdim    static inline bool classof(const RopePieceBTreeNode *N) {
459274958Sdim      return !N->isLeaf();
460274958Sdim    }
461274958Sdim  };
462274958Sdim} // end anonymous namespace
463274958Sdim
464274958Sdim/// split - Split the range containing the specified offset so that we are
465274958Sdim/// guaranteed that there is a place to do an insertion at the specified
466274958Sdim/// offset.  The offset is relative, so "0" is the start of the node.
467274958Sdim///
468274958Sdim/// If there is no space in this subtree for the extra piece, the extra tree
469274958Sdim/// node is returned and must be inserted into a parent.
470274958SdimRopePieceBTreeNode *RopePieceBTreeInterior::split(unsigned Offset) {
471274958Sdim  // Figure out which child to split.
472274958Sdim  if (Offset == 0 || Offset == size())
473274958Sdim    return nullptr; // If we have an exact offset, we're already split.
474274958Sdim
475274958Sdim  unsigned ChildOffset = 0;
476274958Sdim  unsigned i = 0;
477274958Sdim  for (; Offset >= ChildOffset+getChild(i)->size(); ++i)
478274958Sdim    ChildOffset += getChild(i)->size();
479274958Sdim
480274958Sdim  // If already split there, we're done.
481274958Sdim  if (ChildOffset == Offset)
482274958Sdim    return nullptr;
483274958Sdim
484274958Sdim  // Otherwise, recursively split the child.
485274958Sdim  if (RopePieceBTreeNode *RHS = getChild(i)->split(Offset-ChildOffset))
486274958Sdim    return HandleChildPiece(i, RHS);
487274958Sdim  return nullptr; // Done!
488274958Sdim}
489274958Sdim
490274958Sdim/// insert - Insert the specified ropepiece into this tree node at the
491274958Sdim/// specified offset.  The offset is relative, so "0" is the start of the
492274958Sdim/// node.
493274958Sdim///
494274958Sdim/// If there is no space in this subtree for the extra piece, the extra tree
495274958Sdim/// node is returned and must be inserted into a parent.
496274958SdimRopePieceBTreeNode *RopePieceBTreeInterior::insert(unsigned Offset,
497274958Sdim                                                   const RopePiece &R) {
498274958Sdim  // Find the insertion point.  We are guaranteed that there is a split at the
499274958Sdim  // specified offset so find it.
500274958Sdim  unsigned i = 0, e = getNumChildren();
501274958Sdim
502274958Sdim  unsigned ChildOffs = 0;
503274958Sdim  if (Offset == size()) {
504274958Sdim    // Fastpath for a common case.  Insert at end of last child.
505274958Sdim    i = e-1;
506274958Sdim    ChildOffs = size()-getChild(i)->size();
507274958Sdim  } else {
508274958Sdim    for (; Offset > ChildOffs+getChild(i)->size(); ++i)
509274958Sdim      ChildOffs += getChild(i)->size();
510274958Sdim  }
511274958Sdim
512274958Sdim  Size += R.size();
513274958Sdim
514274958Sdim  // Insert at the end of this child.
515274958Sdim  if (RopePieceBTreeNode *RHS = getChild(i)->insert(Offset-ChildOffs, R))
516274958Sdim    return HandleChildPiece(i, RHS);
517274958Sdim
518274958Sdim  return nullptr;
519274958Sdim}
520274958Sdim
521274958Sdim/// HandleChildPiece - A child propagated an insertion result up to us.
522274958Sdim/// Insert the new child, and/or propagate the result further up the tree.
523274958SdimRopePieceBTreeNode *
524274958SdimRopePieceBTreeInterior::HandleChildPiece(unsigned i, RopePieceBTreeNode *RHS) {
525274958Sdim  // Otherwise the child propagated a subtree up to us as a new child.  See if
526274958Sdim  // we have space for it here.
527274958Sdim  if (!isFull()) {
528274958Sdim    // Insert RHS after child 'i'.
529274958Sdim    if (i + 1 != getNumChildren())
530274958Sdim      memmove(&Children[i+2], &Children[i+1],
531274958Sdim              (getNumChildren()-i-1)*sizeof(Children[0]));
532274958Sdim    Children[i+1] = RHS;
533274958Sdim    ++NumChildren;
534274958Sdim    return nullptr;
535274958Sdim  }
536274958Sdim
537274958Sdim  // Okay, this node is full.  Split it in half, moving WidthFactor children to
538274958Sdim  // a newly allocated interior node.
539274958Sdim
540274958Sdim  // Create the new node.
541274958Sdim  RopePieceBTreeInterior *NewNode = new RopePieceBTreeInterior();
542274958Sdim
543274958Sdim  // Move over the last 'WidthFactor' values from here to NewNode.
544274958Sdim  memcpy(&NewNode->Children[0], &Children[WidthFactor],
545274958Sdim         WidthFactor*sizeof(Children[0]));
546274958Sdim
547274958Sdim  // Decrease the number of values in the two nodes.
548274958Sdim  NewNode->NumChildren = NumChildren = WidthFactor;
549274958Sdim
550274958Sdim  // Finally, insert the two new children in the side the can (now) hold them.
551274958Sdim  // These insertions can't fail.
552274958Sdim  if (i < WidthFactor)
553274958Sdim    this->HandleChildPiece(i, RHS);
554274958Sdim  else
555274958Sdim    NewNode->HandleChildPiece(i-WidthFactor, RHS);
556274958Sdim
557274958Sdim  // Recompute the two nodes' size.
558274958Sdim  NewNode->FullRecomputeSizeLocally();
559274958Sdim  FullRecomputeSizeLocally();
560274958Sdim  return NewNode;
561274958Sdim}
562274958Sdim
563274958Sdim/// erase - Remove NumBytes from this node at the specified offset.  We are
564274958Sdim/// guaranteed that there is a split at Offset.
565274958Sdimvoid RopePieceBTreeInterior::erase(unsigned Offset, unsigned NumBytes) {
566274958Sdim  // This will shrink this node by NumBytes.
567274958Sdim  Size -= NumBytes;
568274958Sdim
569274958Sdim  // Find the first child that overlaps with Offset.
570274958Sdim  unsigned i = 0;
571274958Sdim  for (; Offset >= getChild(i)->size(); ++i)
572274958Sdim    Offset -= getChild(i)->size();
573274958Sdim
574274958Sdim  // Propagate the delete request into overlapping children, or completely
575274958Sdim  // delete the children as appropriate.
576274958Sdim  while (NumBytes) {
577274958Sdim    RopePieceBTreeNode *CurChild = getChild(i);
578274958Sdim
579274958Sdim    // If we are deleting something contained entirely in the child, pass on the
580274958Sdim    // request.
581274958Sdim    if (Offset+NumBytes < CurChild->size()) {
582274958Sdim      CurChild->erase(Offset, NumBytes);
583274958Sdim      return;
584274958Sdim    }
585274958Sdim
586274958Sdim    // If this deletion request starts somewhere in the middle of the child, it
587274958Sdim    // must be deleting to the end of the child.
588274958Sdim    if (Offset) {
589274958Sdim      unsigned BytesFromChild = CurChild->size()-Offset;
590274958Sdim      CurChild->erase(Offset, BytesFromChild);
591274958Sdim      NumBytes -= BytesFromChild;
592274958Sdim      // Start at the beginning of the next child.
593274958Sdim      Offset = 0;
594274958Sdim      ++i;
595274958Sdim      continue;
596274958Sdim    }
597274958Sdim
598274958Sdim    // If the deletion request completely covers the child, delete it and move
599274958Sdim    // the rest down.
600274958Sdim    NumBytes -= CurChild->size();
601274958Sdim    CurChild->Destroy();
602274958Sdim    --NumChildren;
603274958Sdim    if (i != getNumChildren())
604274958Sdim      memmove(&Children[i], &Children[i+1],
605274958Sdim              (getNumChildren()-i)*sizeof(Children[0]));
606274958Sdim  }
607274958Sdim}
608274958Sdim
609274958Sdim//===----------------------------------------------------------------------===//
610274958Sdim// RopePieceBTreeNode Implementation
611274958Sdim//===----------------------------------------------------------------------===//
612274958Sdim
613274958Sdimvoid RopePieceBTreeNode::Destroy() {
614274958Sdim  if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this))
615274958Sdim    delete Leaf;
616274958Sdim  else
617274958Sdim    delete cast<RopePieceBTreeInterior>(this);
618274958Sdim}
619274958Sdim
620274958Sdim/// split - Split the range containing the specified offset so that we are
621274958Sdim/// guaranteed that there is a place to do an insertion at the specified
622274958Sdim/// offset.  The offset is relative, so "0" is the start of the node.
623274958Sdim///
624274958Sdim/// If there is no space in this subtree for the extra piece, the extra tree
625274958Sdim/// node is returned and must be inserted into a parent.
626274958SdimRopePieceBTreeNode *RopePieceBTreeNode::split(unsigned Offset) {
627274958Sdim  assert(Offset <= size() && "Invalid offset to split!");
628274958Sdim  if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this))
629274958Sdim    return Leaf->split(Offset);
630274958Sdim  return cast<RopePieceBTreeInterior>(this)->split(Offset);
631274958Sdim}
632274958Sdim
633274958Sdim/// insert - Insert the specified ropepiece into this tree node at the
634274958Sdim/// specified offset.  The offset is relative, so "0" is the start of the
635274958Sdim/// node.
636274958Sdim///
637274958Sdim/// If there is no space in this subtree for the extra piece, the extra tree
638274958Sdim/// node is returned and must be inserted into a parent.
639274958SdimRopePieceBTreeNode *RopePieceBTreeNode::insert(unsigned Offset,
640274958Sdim                                               const RopePiece &R) {
641274958Sdim  assert(Offset <= size() && "Invalid offset to insert!");
642274958Sdim  if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this))
643274958Sdim    return Leaf->insert(Offset, R);
644274958Sdim  return cast<RopePieceBTreeInterior>(this)->insert(Offset, R);
645274958Sdim}
646274958Sdim
647274958Sdim/// erase - Remove NumBytes from this node at the specified offset.  We are
648274958Sdim/// guaranteed that there is a split at Offset.
649274958Sdimvoid RopePieceBTreeNode::erase(unsigned Offset, unsigned NumBytes) {
650274958Sdim  assert(Offset+NumBytes <= size() && "Invalid offset to erase!");
651274958Sdim  if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this))
652274958Sdim    return Leaf->erase(Offset, NumBytes);
653274958Sdim  return cast<RopePieceBTreeInterior>(this)->erase(Offset, NumBytes);
654274958Sdim}
655274958Sdim
656274958Sdim
657274958Sdim//===----------------------------------------------------------------------===//
658274958Sdim// RopePieceBTreeIterator Implementation
659274958Sdim//===----------------------------------------------------------------------===//
660274958Sdim
661274958Sdimstatic const RopePieceBTreeLeaf *getCN(const void *P) {
662274958Sdim  return static_cast<const RopePieceBTreeLeaf*>(P);
663274958Sdim}
664274958Sdim
665274958Sdim// begin iterator.
666274958SdimRopePieceBTreeIterator::RopePieceBTreeIterator(const void *n) {
667274958Sdim  const RopePieceBTreeNode *N = static_cast<const RopePieceBTreeNode*>(n);
668274958Sdim
669274958Sdim  // Walk down the left side of the tree until we get to a leaf.
670274958Sdim  while (const RopePieceBTreeInterior *IN = dyn_cast<RopePieceBTreeInterior>(N))
671274958Sdim    N = IN->getChild(0);
672274958Sdim
673274958Sdim  // We must have at least one leaf.
674274958Sdim  CurNode = cast<RopePieceBTreeLeaf>(N);
675274958Sdim
676274958Sdim  // If we found a leaf that happens to be empty, skip over it until we get
677274958Sdim  // to something full.
678274958Sdim  while (CurNode && getCN(CurNode)->getNumPieces() == 0)
679274958Sdim    CurNode = getCN(CurNode)->getNextLeafInOrder();
680274958Sdim
681274958Sdim  if (CurNode)
682274958Sdim    CurPiece = &getCN(CurNode)->getPiece(0);
683274958Sdim  else  // Empty tree, this is an end() iterator.
684274958Sdim    CurPiece = nullptr;
685274958Sdim  CurChar = 0;
686274958Sdim}
687274958Sdim
688274958Sdimvoid RopePieceBTreeIterator::MoveToNextPiece() {
689274958Sdim  if (CurPiece != &getCN(CurNode)->getPiece(getCN(CurNode)->getNumPieces()-1)) {
690274958Sdim    CurChar = 0;
691274958Sdim    ++CurPiece;
692274958Sdim    return;
693274958Sdim  }
694274958Sdim
695274958Sdim  // Find the next non-empty leaf node.
696274958Sdim  do
697274958Sdim    CurNode = getCN(CurNode)->getNextLeafInOrder();
698274958Sdim  while (CurNode && getCN(CurNode)->getNumPieces() == 0);
699274958Sdim
700274958Sdim  if (CurNode)
701274958Sdim    CurPiece = &getCN(CurNode)->getPiece(0);
702274958Sdim  else // Hit end().
703274958Sdim    CurPiece = nullptr;
704274958Sdim  CurChar = 0;
705274958Sdim}
706274958Sdim
707274958Sdim//===----------------------------------------------------------------------===//
708274958Sdim// RopePieceBTree Implementation
709274958Sdim//===----------------------------------------------------------------------===//
710274958Sdim
711274958Sdimstatic RopePieceBTreeNode *getRoot(void *P) {
712274958Sdim  return static_cast<RopePieceBTreeNode*>(P);
713274958Sdim}
714274958Sdim
715274958SdimRopePieceBTree::RopePieceBTree() {
716274958Sdim  Root = new RopePieceBTreeLeaf();
717274958Sdim}
718274958SdimRopePieceBTree::RopePieceBTree(const RopePieceBTree &RHS) {
719274958Sdim  assert(RHS.empty() && "Can't copy non-empty tree yet");
720274958Sdim  Root = new RopePieceBTreeLeaf();
721274958Sdim}
722274958SdimRopePieceBTree::~RopePieceBTree() {
723274958Sdim  getRoot(Root)->Destroy();
724274958Sdim}
725274958Sdim
726274958Sdimunsigned RopePieceBTree::size() const {
727274958Sdim  return getRoot(Root)->size();
728274958Sdim}
729274958Sdim
730274958Sdimvoid RopePieceBTree::clear() {
731274958Sdim  if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(getRoot(Root)))
732274958Sdim    Leaf->clear();
733274958Sdim  else {
734274958Sdim    getRoot(Root)->Destroy();
735274958Sdim    Root = new RopePieceBTreeLeaf();
736274958Sdim  }
737274958Sdim}
738274958Sdim
739274958Sdimvoid RopePieceBTree::insert(unsigned Offset, const RopePiece &R) {
740274958Sdim  // #1. Split at Offset.
741274958Sdim  if (RopePieceBTreeNode *RHS = getRoot(Root)->split(Offset))
742274958Sdim    Root = new RopePieceBTreeInterior(getRoot(Root), RHS);
743274958Sdim
744274958Sdim  // #2. Do the insertion.
745274958Sdim  if (RopePieceBTreeNode *RHS = getRoot(Root)->insert(Offset, R))
746274958Sdim    Root = new RopePieceBTreeInterior(getRoot(Root), RHS);
747274958Sdim}
748274958Sdim
749274958Sdimvoid RopePieceBTree::erase(unsigned Offset, unsigned NumBytes) {
750274958Sdim  // #1. Split at Offset.
751274958Sdim  if (RopePieceBTreeNode *RHS = getRoot(Root)->split(Offset))
752274958Sdim    Root = new RopePieceBTreeInterior(getRoot(Root), RHS);
753274958Sdim
754274958Sdim  // #2. Do the erasing.
755274958Sdim  getRoot(Root)->erase(Offset, NumBytes);
756274958Sdim}
757274958Sdim
758274958Sdim//===----------------------------------------------------------------------===//
759274958Sdim// RewriteRope Implementation
760274958Sdim//===----------------------------------------------------------------------===//
761274958Sdim
762274958Sdim/// MakeRopeString - This copies the specified byte range into some instance of
763274958Sdim/// RopeRefCountString, and return a RopePiece that represents it.  This uses
764274958Sdim/// the AllocBuffer object to aggregate requests for small strings into one
765274958Sdim/// allocation instead of doing tons of tiny allocations.
766274958SdimRopePiece RewriteRope::MakeRopeString(const char *Start, const char *End) {
767274958Sdim  unsigned Len = End-Start;
768274958Sdim  assert(Len && "Zero length RopePiece is invalid!");
769274958Sdim
770274958Sdim  // If we have space for this string in the current alloc buffer, use it.
771274958Sdim  if (AllocOffs+Len <= AllocChunkSize) {
772274958Sdim    memcpy(AllocBuffer->Data+AllocOffs, Start, Len);
773274958Sdim    AllocOffs += Len;
774274958Sdim    return RopePiece(AllocBuffer, AllocOffs-Len, AllocOffs);
775274958Sdim  }
776274958Sdim
777274958Sdim  // If we don't have enough room because this specific allocation is huge,
778274958Sdim  // just allocate a new rope piece for it alone.
779274958Sdim  if (Len > AllocChunkSize) {
780274958Sdim    unsigned Size = End-Start+sizeof(RopeRefCountString)-1;
781274958Sdim    RopeRefCountString *Res =
782274958Sdim      reinterpret_cast<RopeRefCountString *>(new char[Size]);
783274958Sdim    Res->RefCount = 0;
784274958Sdim    memcpy(Res->Data, Start, End-Start);
785274958Sdim    return RopePiece(Res, 0, End-Start);
786274958Sdim  }
787274958Sdim
788274958Sdim  // Otherwise, this was a small request but we just don't have space for it
789274958Sdim  // Make a new chunk and share it with later allocations.
790274958Sdim
791274958Sdim  unsigned AllocSize = offsetof(RopeRefCountString, Data) + AllocChunkSize;
792280031Sdim  RopeRefCountString *Res =
793280031Sdim      reinterpret_cast<RopeRefCountString *>(new char[AllocSize]);
794280031Sdim  Res->RefCount = 0;
795280031Sdim  memcpy(Res->Data, Start, Len);
796280031Sdim  AllocBuffer = Res;
797274958Sdim  AllocOffs = Len;
798274958Sdim
799274958Sdim  return RopePiece(AllocBuffer, 0, Len);
800274958Sdim}
801274958Sdim
802274958Sdim
803