1274958Sdim//===--- RewriteRope.cpp - Rope specialized for rewriter --------*- C++ -*-===// 2274958Sdim// 3274958Sdim// The LLVM Compiler Infrastructure 4274958Sdim// 5274958Sdim// This file is distributed under the University of Illinois Open Source 6274958Sdim// License. See LICENSE.TXT for details. 7274958Sdim// 8274958Sdim//===----------------------------------------------------------------------===// 9274958Sdim// 10274958Sdim// This file implements the RewriteRope class, which is a powerful string. 11274958Sdim// 12274958Sdim//===----------------------------------------------------------------------===// 13274958Sdim 14274958Sdim#include "clang/Rewrite/Core/RewriteRope.h" 15274958Sdim#include "clang/Basic/LLVM.h" 16274958Sdim#include <algorithm> 17274958Sdimusing namespace clang; 18274958Sdim 19274958Sdim/// RewriteRope is a "strong" string class, designed to make insertions and 20274958Sdim/// deletions in the middle of the string nearly constant time (really, they are 21274958Sdim/// O(log N), but with a very low constant factor). 22274958Sdim/// 23274958Sdim/// The implementation of this datastructure is a conceptual linear sequence of 24274958Sdim/// RopePiece elements. Each RopePiece represents a view on a separately 25274958Sdim/// allocated and reference counted string. This means that splitting a very 26274958Sdim/// long string can be done in constant time by splitting a RopePiece that 27274958Sdim/// references the whole string into two rope pieces that reference each half. 28274958Sdim/// Once split, another string can be inserted in between the two halves by 29274958Sdim/// inserting a RopePiece in between the two others. All of this is very 30274958Sdim/// inexpensive: it takes time proportional to the number of RopePieces, not the 31274958Sdim/// length of the strings they represent. 32274958Sdim/// 33274958Sdim/// While a linear sequences of RopePieces is the conceptual model, the actual 34274958Sdim/// implementation captures them in an adapted B+ Tree. Using a B+ tree (which 35274958Sdim/// is a tree that keeps the values in the leaves and has where each node 36274958Sdim/// contains a reasonable number of pointers to children/values) allows us to 37274958Sdim/// maintain efficient operation when the RewriteRope contains a *huge* number 38274958Sdim/// of RopePieces. The basic idea of the B+ Tree is that it allows us to find 39274958Sdim/// the RopePiece corresponding to some offset very efficiently, and it 40274958Sdim/// automatically balances itself on insertions of RopePieces (which can happen 41274958Sdim/// for both insertions and erases of string ranges). 42274958Sdim/// 43274958Sdim/// The one wrinkle on the theory is that we don't attempt to keep the tree 44274958Sdim/// properly balanced when erases happen. Erases of string data can both insert 45274958Sdim/// new RopePieces (e.g. when the middle of some other rope piece is deleted, 46274958Sdim/// which results in two rope pieces, which is just like an insert) or it can 47274958Sdim/// reduce the number of RopePieces maintained by the B+Tree. In the case when 48274958Sdim/// the number of RopePieces is reduced, we don't attempt to maintain the 49274958Sdim/// standard 'invariant' that each node in the tree contains at least 50274958Sdim/// 'WidthFactor' children/values. For our use cases, this doesn't seem to 51274958Sdim/// matter. 52274958Sdim/// 53274958Sdim/// The implementation below is primarily implemented in terms of three classes: 54274958Sdim/// RopePieceBTreeNode - Common base class for: 55274958Sdim/// 56274958Sdim/// RopePieceBTreeLeaf - Directly manages up to '2*WidthFactor' RopePiece 57274958Sdim/// nodes. This directly represents a chunk of the string with those 58274958Sdim/// RopePieces contatenated. 59274958Sdim/// RopePieceBTreeInterior - An interior node in the B+ Tree, which manages 60274958Sdim/// up to '2*WidthFactor' other nodes in the tree. 61274958Sdim 62274958Sdim 63274958Sdim//===----------------------------------------------------------------------===// 64274958Sdim// RopePieceBTreeNode Class 65274958Sdim//===----------------------------------------------------------------------===// 66274958Sdim 67274958Sdimnamespace { 68274958Sdim /// RopePieceBTreeNode - Common base class of RopePieceBTreeLeaf and 69274958Sdim /// RopePieceBTreeInterior. This provides some 'virtual' dispatching methods 70274958Sdim /// and a flag that determines which subclass the instance is. Also 71274958Sdim /// important, this node knows the full extend of the node, including any 72274958Sdim /// children that it has. This allows efficient skipping over entire subtrees 73274958Sdim /// when looking for an offset in the BTree. 74274958Sdim class RopePieceBTreeNode { 75274958Sdim protected: 76274958Sdim /// WidthFactor - This controls the number of K/V slots held in the BTree: 77274958Sdim /// how wide it is. Each level of the BTree is guaranteed to have at least 78274958Sdim /// 'WidthFactor' elements in it (either ropepieces or children), (except 79274958Sdim /// the root, which may have less) and may have at most 2*WidthFactor 80274958Sdim /// elements. 81274958Sdim enum { WidthFactor = 8 }; 82274958Sdim 83274958Sdim /// Size - This is the number of bytes of file this node (including any 84274958Sdim /// potential children) covers. 85274958Sdim unsigned Size; 86274958Sdim 87274958Sdim /// IsLeaf - True if this is an instance of RopePieceBTreeLeaf, false if it 88274958Sdim /// is an instance of RopePieceBTreeInterior. 89274958Sdim bool IsLeaf; 90274958Sdim 91274958Sdim RopePieceBTreeNode(bool isLeaf) : Size(0), IsLeaf(isLeaf) {} 92288943Sdim ~RopePieceBTreeNode() = default; 93288943Sdim 94274958Sdim public: 95274958Sdim bool isLeaf() const { return IsLeaf; } 96274958Sdim unsigned size() const { return Size; } 97274958Sdim 98274958Sdim void Destroy(); 99274958Sdim 100274958Sdim /// split - Split the range containing the specified offset so that we are 101274958Sdim /// guaranteed that there is a place to do an insertion at the specified 102274958Sdim /// offset. The offset is relative, so "0" is the start of the node. 103274958Sdim /// 104274958Sdim /// If there is no space in this subtree for the extra piece, the extra tree 105274958Sdim /// node is returned and must be inserted into a parent. 106274958Sdim RopePieceBTreeNode *split(unsigned Offset); 107274958Sdim 108274958Sdim /// insert - Insert the specified ropepiece into this tree node at the 109274958Sdim /// specified offset. The offset is relative, so "0" is the start of the 110274958Sdim /// node. 111274958Sdim /// 112274958Sdim /// If there is no space in this subtree for the extra piece, the extra tree 113274958Sdim /// node is returned and must be inserted into a parent. 114274958Sdim RopePieceBTreeNode *insert(unsigned Offset, const RopePiece &R); 115274958Sdim 116274958Sdim /// erase - Remove NumBytes from this node at the specified offset. We are 117274958Sdim /// guaranteed that there is a split at Offset. 118274958Sdim void erase(unsigned Offset, unsigned NumBytes); 119274958Sdim 120274958Sdim }; 121274958Sdim} // end anonymous namespace 122274958Sdim 123274958Sdim//===----------------------------------------------------------------------===// 124274958Sdim// RopePieceBTreeLeaf Class 125274958Sdim//===----------------------------------------------------------------------===// 126274958Sdim 127274958Sdimnamespace { 128274958Sdim /// RopePieceBTreeLeaf - Directly manages up to '2*WidthFactor' RopePiece 129274958Sdim /// nodes. This directly represents a chunk of the string with those 130274958Sdim /// RopePieces contatenated. Since this is a B+Tree, all values (in this case 131274958Sdim /// instances of RopePiece) are stored in leaves like this. To make iteration 132274958Sdim /// over the leaves efficient, they maintain a singly linked list through the 133274958Sdim /// NextLeaf field. This allows the B+Tree forward iterator to be constant 134274958Sdim /// time for all increments. 135274958Sdim class RopePieceBTreeLeaf : public RopePieceBTreeNode { 136274958Sdim /// NumPieces - This holds the number of rope pieces currently active in the 137274958Sdim /// Pieces array. 138274958Sdim unsigned char NumPieces; 139274958Sdim 140274958Sdim /// Pieces - This tracks the file chunks currently in this leaf. 141274958Sdim /// 142274958Sdim RopePiece Pieces[2*WidthFactor]; 143274958Sdim 144274958Sdim /// NextLeaf - This is a pointer to the next leaf in the tree, allowing 145274958Sdim /// efficient in-order forward iteration of the tree without traversal. 146274958Sdim RopePieceBTreeLeaf **PrevLeaf, *NextLeaf; 147274958Sdim public: 148274958Sdim RopePieceBTreeLeaf() : RopePieceBTreeNode(true), NumPieces(0), 149274958Sdim PrevLeaf(nullptr), NextLeaf(nullptr) {} 150274958Sdim ~RopePieceBTreeLeaf() { 151274958Sdim if (PrevLeaf || NextLeaf) 152274958Sdim removeFromLeafInOrder(); 153274958Sdim clear(); 154274958Sdim } 155274958Sdim 156274958Sdim bool isFull() const { return NumPieces == 2*WidthFactor; } 157274958Sdim 158274958Sdim /// clear - Remove all rope pieces from this leaf. 159274958Sdim void clear() { 160274958Sdim while (NumPieces) 161274958Sdim Pieces[--NumPieces] = RopePiece(); 162274958Sdim Size = 0; 163274958Sdim } 164274958Sdim 165274958Sdim unsigned getNumPieces() const { return NumPieces; } 166274958Sdim 167274958Sdim const RopePiece &getPiece(unsigned i) const { 168274958Sdim assert(i < getNumPieces() && "Invalid piece ID"); 169274958Sdim return Pieces[i]; 170274958Sdim } 171274958Sdim 172274958Sdim const RopePieceBTreeLeaf *getNextLeafInOrder() const { return NextLeaf; } 173274958Sdim void insertAfterLeafInOrder(RopePieceBTreeLeaf *Node) { 174274958Sdim assert(!PrevLeaf && !NextLeaf && "Already in ordering"); 175274958Sdim 176274958Sdim NextLeaf = Node->NextLeaf; 177274958Sdim if (NextLeaf) 178274958Sdim NextLeaf->PrevLeaf = &NextLeaf; 179274958Sdim PrevLeaf = &Node->NextLeaf; 180274958Sdim Node->NextLeaf = this; 181274958Sdim } 182274958Sdim 183274958Sdim void removeFromLeafInOrder() { 184274958Sdim if (PrevLeaf) { 185274958Sdim *PrevLeaf = NextLeaf; 186274958Sdim if (NextLeaf) 187274958Sdim NextLeaf->PrevLeaf = PrevLeaf; 188274958Sdim } else if (NextLeaf) { 189274958Sdim NextLeaf->PrevLeaf = nullptr; 190274958Sdim } 191274958Sdim } 192274958Sdim 193274958Sdim /// FullRecomputeSizeLocally - This method recomputes the 'Size' field by 194274958Sdim /// summing the size of all RopePieces. 195274958Sdim void FullRecomputeSizeLocally() { 196274958Sdim Size = 0; 197274958Sdim for (unsigned i = 0, e = getNumPieces(); i != e; ++i) 198274958Sdim Size += getPiece(i).size(); 199274958Sdim } 200274958Sdim 201274958Sdim /// split - Split the range containing the specified offset so that we are 202274958Sdim /// guaranteed that there is a place to do an insertion at the specified 203274958Sdim /// offset. The offset is relative, so "0" is the start of the node. 204274958Sdim /// 205274958Sdim /// If there is no space in this subtree for the extra piece, the extra tree 206274958Sdim /// node is returned and must be inserted into a parent. 207274958Sdim RopePieceBTreeNode *split(unsigned Offset); 208274958Sdim 209274958Sdim /// insert - Insert the specified ropepiece into this tree node at the 210274958Sdim /// specified offset. The offset is relative, so "0" is the start of the 211274958Sdim /// node. 212274958Sdim /// 213274958Sdim /// If there is no space in this subtree for the extra piece, the extra tree 214274958Sdim /// node is returned and must be inserted into a parent. 215274958Sdim RopePieceBTreeNode *insert(unsigned Offset, const RopePiece &R); 216274958Sdim 217274958Sdim 218274958Sdim /// erase - Remove NumBytes from this node at the specified offset. We are 219274958Sdim /// guaranteed that there is a split at Offset. 220274958Sdim void erase(unsigned Offset, unsigned NumBytes); 221274958Sdim 222274958Sdim static inline bool classof(const RopePieceBTreeNode *N) { 223274958Sdim return N->isLeaf(); 224274958Sdim } 225274958Sdim }; 226274958Sdim} // end anonymous namespace 227274958Sdim 228274958Sdim/// split - Split the range containing the specified offset so that we are 229274958Sdim/// guaranteed that there is a place to do an insertion at the specified 230274958Sdim/// offset. The offset is relative, so "0" is the start of the node. 231274958Sdim/// 232274958Sdim/// If there is no space in this subtree for the extra piece, the extra tree 233274958Sdim/// node is returned and must be inserted into a parent. 234274958SdimRopePieceBTreeNode *RopePieceBTreeLeaf::split(unsigned Offset) { 235274958Sdim // Find the insertion point. We are guaranteed that there is a split at the 236274958Sdim // specified offset so find it. 237274958Sdim if (Offset == 0 || Offset == size()) { 238274958Sdim // Fastpath for a common case. There is already a splitpoint at the end. 239274958Sdim return nullptr; 240274958Sdim } 241274958Sdim 242274958Sdim // Find the piece that this offset lands in. 243274958Sdim unsigned PieceOffs = 0; 244274958Sdim unsigned i = 0; 245274958Sdim while (Offset >= PieceOffs+Pieces[i].size()) { 246274958Sdim PieceOffs += Pieces[i].size(); 247274958Sdim ++i; 248274958Sdim } 249274958Sdim 250274958Sdim // If there is already a split point at the specified offset, just return 251274958Sdim // success. 252274958Sdim if (PieceOffs == Offset) 253274958Sdim return nullptr; 254274958Sdim 255274958Sdim // Otherwise, we need to split piece 'i' at Offset-PieceOffs. Convert Offset 256274958Sdim // to being Piece relative. 257274958Sdim unsigned IntraPieceOffset = Offset-PieceOffs; 258274958Sdim 259274958Sdim // We do this by shrinking the RopePiece and then doing an insert of the tail. 260274958Sdim RopePiece Tail(Pieces[i].StrData, Pieces[i].StartOffs+IntraPieceOffset, 261274958Sdim Pieces[i].EndOffs); 262274958Sdim Size -= Pieces[i].size(); 263274958Sdim Pieces[i].EndOffs = Pieces[i].StartOffs+IntraPieceOffset; 264274958Sdim Size += Pieces[i].size(); 265274958Sdim 266274958Sdim return insert(Offset, Tail); 267274958Sdim} 268274958Sdim 269274958Sdim 270274958Sdim/// insert - Insert the specified RopePiece into this tree node at the 271274958Sdim/// specified offset. The offset is relative, so "0" is the start of the node. 272274958Sdim/// 273274958Sdim/// If there is no space in this subtree for the extra piece, the extra tree 274274958Sdim/// node is returned and must be inserted into a parent. 275274958SdimRopePieceBTreeNode *RopePieceBTreeLeaf::insert(unsigned Offset, 276274958Sdim const RopePiece &R) { 277274958Sdim // If this node is not full, insert the piece. 278274958Sdim if (!isFull()) { 279274958Sdim // Find the insertion point. We are guaranteed that there is a split at the 280274958Sdim // specified offset so find it. 281274958Sdim unsigned i = 0, e = getNumPieces(); 282274958Sdim if (Offset == size()) { 283274958Sdim // Fastpath for a common case. 284274958Sdim i = e; 285274958Sdim } else { 286274958Sdim unsigned SlotOffs = 0; 287274958Sdim for (; Offset > SlotOffs; ++i) 288274958Sdim SlotOffs += getPiece(i).size(); 289274958Sdim assert(SlotOffs == Offset && "Split didn't occur before insertion!"); 290274958Sdim } 291274958Sdim 292274958Sdim // For an insertion into a non-full leaf node, just insert the value in 293274958Sdim // its sorted position. This requires moving later values over. 294274958Sdim for (; i != e; --e) 295274958Sdim Pieces[e] = Pieces[e-1]; 296274958Sdim Pieces[i] = R; 297274958Sdim ++NumPieces; 298274958Sdim Size += R.size(); 299274958Sdim return nullptr; 300274958Sdim } 301274958Sdim 302274958Sdim // Otherwise, if this is leaf is full, split it in two halves. Since this 303274958Sdim // node is full, it contains 2*WidthFactor values. We move the first 304274958Sdim // 'WidthFactor' values to the LHS child (which we leave in this node) and 305274958Sdim // move the last 'WidthFactor' values into the RHS child. 306274958Sdim 307274958Sdim // Create the new node. 308274958Sdim RopePieceBTreeLeaf *NewNode = new RopePieceBTreeLeaf(); 309274958Sdim 310274958Sdim // Move over the last 'WidthFactor' values from here to NewNode. 311274958Sdim std::copy(&Pieces[WidthFactor], &Pieces[2*WidthFactor], 312274958Sdim &NewNode->Pieces[0]); 313274958Sdim // Replace old pieces with null RopePieces to drop refcounts. 314274958Sdim std::fill(&Pieces[WidthFactor], &Pieces[2*WidthFactor], RopePiece()); 315274958Sdim 316274958Sdim // Decrease the number of values in the two nodes. 317274958Sdim NewNode->NumPieces = NumPieces = WidthFactor; 318274958Sdim 319274958Sdim // Recompute the two nodes' size. 320274958Sdim NewNode->FullRecomputeSizeLocally(); 321274958Sdim FullRecomputeSizeLocally(); 322274958Sdim 323274958Sdim // Update the list of leaves. 324274958Sdim NewNode->insertAfterLeafInOrder(this); 325274958Sdim 326274958Sdim // These insertions can't fail. 327274958Sdim if (this->size() >= Offset) 328274958Sdim this->insert(Offset, R); 329274958Sdim else 330274958Sdim NewNode->insert(Offset - this->size(), R); 331274958Sdim return NewNode; 332274958Sdim} 333274958Sdim 334274958Sdim/// erase - Remove NumBytes from this node at the specified offset. We are 335274958Sdim/// guaranteed that there is a split at Offset. 336274958Sdimvoid RopePieceBTreeLeaf::erase(unsigned Offset, unsigned NumBytes) { 337274958Sdim // Since we are guaranteed that there is a split at Offset, we start by 338274958Sdim // finding the Piece that starts there. 339274958Sdim unsigned PieceOffs = 0; 340274958Sdim unsigned i = 0; 341274958Sdim for (; Offset > PieceOffs; ++i) 342274958Sdim PieceOffs += getPiece(i).size(); 343274958Sdim assert(PieceOffs == Offset && "Split didn't occur before erase!"); 344274958Sdim 345274958Sdim unsigned StartPiece = i; 346274958Sdim 347274958Sdim // Figure out how many pieces completely cover 'NumBytes'. We want to remove 348274958Sdim // all of them. 349274958Sdim for (; Offset+NumBytes > PieceOffs+getPiece(i).size(); ++i) 350274958Sdim PieceOffs += getPiece(i).size(); 351274958Sdim 352274958Sdim // If we exactly include the last one, include it in the region to delete. 353274958Sdim if (Offset+NumBytes == PieceOffs+getPiece(i).size()) 354274958Sdim PieceOffs += getPiece(i).size(), ++i; 355274958Sdim 356274958Sdim // If we completely cover some RopePieces, erase them now. 357274958Sdim if (i != StartPiece) { 358274958Sdim unsigned NumDeleted = i-StartPiece; 359274958Sdim for (; i != getNumPieces(); ++i) 360274958Sdim Pieces[i-NumDeleted] = Pieces[i]; 361274958Sdim 362274958Sdim // Drop references to dead rope pieces. 363274958Sdim std::fill(&Pieces[getNumPieces()-NumDeleted], &Pieces[getNumPieces()], 364274958Sdim RopePiece()); 365274958Sdim NumPieces -= NumDeleted; 366274958Sdim 367274958Sdim unsigned CoverBytes = PieceOffs-Offset; 368274958Sdim NumBytes -= CoverBytes; 369274958Sdim Size -= CoverBytes; 370274958Sdim } 371274958Sdim 372274958Sdim // If we completely removed some stuff, we could be done. 373274958Sdim if (NumBytes == 0) return; 374274958Sdim 375274958Sdim // Okay, now might be erasing part of some Piece. If this is the case, then 376274958Sdim // move the start point of the piece. 377274958Sdim assert(getPiece(StartPiece).size() > NumBytes); 378274958Sdim Pieces[StartPiece].StartOffs += NumBytes; 379274958Sdim 380274958Sdim // The size of this node just shrunk by NumBytes. 381274958Sdim Size -= NumBytes; 382274958Sdim} 383274958Sdim 384274958Sdim//===----------------------------------------------------------------------===// 385274958Sdim// RopePieceBTreeInterior Class 386274958Sdim//===----------------------------------------------------------------------===// 387274958Sdim 388274958Sdimnamespace { 389274958Sdim /// RopePieceBTreeInterior - This represents an interior node in the B+Tree, 390274958Sdim /// which holds up to 2*WidthFactor pointers to child nodes. 391274958Sdim class RopePieceBTreeInterior : public RopePieceBTreeNode { 392274958Sdim /// NumChildren - This holds the number of children currently active in the 393274958Sdim /// Children array. 394274958Sdim unsigned char NumChildren; 395274958Sdim RopePieceBTreeNode *Children[2*WidthFactor]; 396274958Sdim public: 397274958Sdim RopePieceBTreeInterior() : RopePieceBTreeNode(false), NumChildren(0) {} 398274958Sdim 399274958Sdim RopePieceBTreeInterior(RopePieceBTreeNode *LHS, RopePieceBTreeNode *RHS) 400274958Sdim : RopePieceBTreeNode(false) { 401274958Sdim Children[0] = LHS; 402274958Sdim Children[1] = RHS; 403274958Sdim NumChildren = 2; 404274958Sdim Size = LHS->size() + RHS->size(); 405274958Sdim } 406274958Sdim 407274958Sdim ~RopePieceBTreeInterior() { 408274958Sdim for (unsigned i = 0, e = getNumChildren(); i != e; ++i) 409274958Sdim Children[i]->Destroy(); 410274958Sdim } 411274958Sdim 412274958Sdim bool isFull() const { return NumChildren == 2*WidthFactor; } 413274958Sdim 414274958Sdim unsigned getNumChildren() const { return NumChildren; } 415274958Sdim const RopePieceBTreeNode *getChild(unsigned i) const { 416274958Sdim assert(i < NumChildren && "invalid child #"); 417274958Sdim return Children[i]; 418274958Sdim } 419274958Sdim RopePieceBTreeNode *getChild(unsigned i) { 420274958Sdim assert(i < NumChildren && "invalid child #"); 421274958Sdim return Children[i]; 422274958Sdim } 423274958Sdim 424274958Sdim /// FullRecomputeSizeLocally - Recompute the Size field of this node by 425274958Sdim /// summing up the sizes of the child nodes. 426274958Sdim void FullRecomputeSizeLocally() { 427274958Sdim Size = 0; 428274958Sdim for (unsigned i = 0, e = getNumChildren(); i != e; ++i) 429274958Sdim Size += getChild(i)->size(); 430274958Sdim } 431274958Sdim 432274958Sdim 433274958Sdim /// split - Split the range containing the specified offset so that we are 434274958Sdim /// guaranteed that there is a place to do an insertion at the specified 435274958Sdim /// offset. The offset is relative, so "0" is the start of the node. 436274958Sdim /// 437274958Sdim /// If there is no space in this subtree for the extra piece, the extra tree 438274958Sdim /// node is returned and must be inserted into a parent. 439274958Sdim RopePieceBTreeNode *split(unsigned Offset); 440274958Sdim 441274958Sdim 442274958Sdim /// insert - Insert the specified ropepiece into this tree node at the 443274958Sdim /// specified offset. The offset is relative, so "0" is the start of the 444274958Sdim /// node. 445274958Sdim /// 446274958Sdim /// If there is no space in this subtree for the extra piece, the extra tree 447274958Sdim /// node is returned and must be inserted into a parent. 448274958Sdim RopePieceBTreeNode *insert(unsigned Offset, const RopePiece &R); 449274958Sdim 450274958Sdim /// HandleChildPiece - A child propagated an insertion result up to us. 451274958Sdim /// Insert the new child, and/or propagate the result further up the tree. 452274958Sdim RopePieceBTreeNode *HandleChildPiece(unsigned i, RopePieceBTreeNode *RHS); 453274958Sdim 454274958Sdim /// erase - Remove NumBytes from this node at the specified offset. We are 455274958Sdim /// guaranteed that there is a split at Offset. 456274958Sdim void erase(unsigned Offset, unsigned NumBytes); 457274958Sdim 458274958Sdim static inline bool classof(const RopePieceBTreeNode *N) { 459274958Sdim return !N->isLeaf(); 460274958Sdim } 461274958Sdim }; 462274958Sdim} // end anonymous namespace 463274958Sdim 464274958Sdim/// split - Split the range containing the specified offset so that we are 465274958Sdim/// guaranteed that there is a place to do an insertion at the specified 466274958Sdim/// offset. The offset is relative, so "0" is the start of the node. 467274958Sdim/// 468274958Sdim/// If there is no space in this subtree for the extra piece, the extra tree 469274958Sdim/// node is returned and must be inserted into a parent. 470274958SdimRopePieceBTreeNode *RopePieceBTreeInterior::split(unsigned Offset) { 471274958Sdim // Figure out which child to split. 472274958Sdim if (Offset == 0 || Offset == size()) 473274958Sdim return nullptr; // If we have an exact offset, we're already split. 474274958Sdim 475274958Sdim unsigned ChildOffset = 0; 476274958Sdim unsigned i = 0; 477274958Sdim for (; Offset >= ChildOffset+getChild(i)->size(); ++i) 478274958Sdim ChildOffset += getChild(i)->size(); 479274958Sdim 480274958Sdim // If already split there, we're done. 481274958Sdim if (ChildOffset == Offset) 482274958Sdim return nullptr; 483274958Sdim 484274958Sdim // Otherwise, recursively split the child. 485274958Sdim if (RopePieceBTreeNode *RHS = getChild(i)->split(Offset-ChildOffset)) 486274958Sdim return HandleChildPiece(i, RHS); 487274958Sdim return nullptr; // Done! 488274958Sdim} 489274958Sdim 490274958Sdim/// insert - Insert the specified ropepiece into this tree node at the 491274958Sdim/// specified offset. The offset is relative, so "0" is the start of the 492274958Sdim/// node. 493274958Sdim/// 494274958Sdim/// If there is no space in this subtree for the extra piece, the extra tree 495274958Sdim/// node is returned and must be inserted into a parent. 496274958SdimRopePieceBTreeNode *RopePieceBTreeInterior::insert(unsigned Offset, 497274958Sdim const RopePiece &R) { 498274958Sdim // Find the insertion point. We are guaranteed that there is a split at the 499274958Sdim // specified offset so find it. 500274958Sdim unsigned i = 0, e = getNumChildren(); 501274958Sdim 502274958Sdim unsigned ChildOffs = 0; 503274958Sdim if (Offset == size()) { 504274958Sdim // Fastpath for a common case. Insert at end of last child. 505274958Sdim i = e-1; 506274958Sdim ChildOffs = size()-getChild(i)->size(); 507274958Sdim } else { 508274958Sdim for (; Offset > ChildOffs+getChild(i)->size(); ++i) 509274958Sdim ChildOffs += getChild(i)->size(); 510274958Sdim } 511274958Sdim 512274958Sdim Size += R.size(); 513274958Sdim 514274958Sdim // Insert at the end of this child. 515274958Sdim if (RopePieceBTreeNode *RHS = getChild(i)->insert(Offset-ChildOffs, R)) 516274958Sdim return HandleChildPiece(i, RHS); 517274958Sdim 518274958Sdim return nullptr; 519274958Sdim} 520274958Sdim 521274958Sdim/// HandleChildPiece - A child propagated an insertion result up to us. 522274958Sdim/// Insert the new child, and/or propagate the result further up the tree. 523274958SdimRopePieceBTreeNode * 524274958SdimRopePieceBTreeInterior::HandleChildPiece(unsigned i, RopePieceBTreeNode *RHS) { 525274958Sdim // Otherwise the child propagated a subtree up to us as a new child. See if 526274958Sdim // we have space for it here. 527274958Sdim if (!isFull()) { 528274958Sdim // Insert RHS after child 'i'. 529274958Sdim if (i + 1 != getNumChildren()) 530274958Sdim memmove(&Children[i+2], &Children[i+1], 531274958Sdim (getNumChildren()-i-1)*sizeof(Children[0])); 532274958Sdim Children[i+1] = RHS; 533274958Sdim ++NumChildren; 534274958Sdim return nullptr; 535274958Sdim } 536274958Sdim 537274958Sdim // Okay, this node is full. Split it in half, moving WidthFactor children to 538274958Sdim // a newly allocated interior node. 539274958Sdim 540274958Sdim // Create the new node. 541274958Sdim RopePieceBTreeInterior *NewNode = new RopePieceBTreeInterior(); 542274958Sdim 543274958Sdim // Move over the last 'WidthFactor' values from here to NewNode. 544274958Sdim memcpy(&NewNode->Children[0], &Children[WidthFactor], 545274958Sdim WidthFactor*sizeof(Children[0])); 546274958Sdim 547274958Sdim // Decrease the number of values in the two nodes. 548274958Sdim NewNode->NumChildren = NumChildren = WidthFactor; 549274958Sdim 550274958Sdim // Finally, insert the two new children in the side the can (now) hold them. 551274958Sdim // These insertions can't fail. 552274958Sdim if (i < WidthFactor) 553274958Sdim this->HandleChildPiece(i, RHS); 554274958Sdim else 555274958Sdim NewNode->HandleChildPiece(i-WidthFactor, RHS); 556274958Sdim 557274958Sdim // Recompute the two nodes' size. 558274958Sdim NewNode->FullRecomputeSizeLocally(); 559274958Sdim FullRecomputeSizeLocally(); 560274958Sdim return NewNode; 561274958Sdim} 562274958Sdim 563274958Sdim/// erase - Remove NumBytes from this node at the specified offset. We are 564274958Sdim/// guaranteed that there is a split at Offset. 565274958Sdimvoid RopePieceBTreeInterior::erase(unsigned Offset, unsigned NumBytes) { 566274958Sdim // This will shrink this node by NumBytes. 567274958Sdim Size -= NumBytes; 568274958Sdim 569274958Sdim // Find the first child that overlaps with Offset. 570274958Sdim unsigned i = 0; 571274958Sdim for (; Offset >= getChild(i)->size(); ++i) 572274958Sdim Offset -= getChild(i)->size(); 573274958Sdim 574274958Sdim // Propagate the delete request into overlapping children, or completely 575274958Sdim // delete the children as appropriate. 576274958Sdim while (NumBytes) { 577274958Sdim RopePieceBTreeNode *CurChild = getChild(i); 578274958Sdim 579274958Sdim // If we are deleting something contained entirely in the child, pass on the 580274958Sdim // request. 581274958Sdim if (Offset+NumBytes < CurChild->size()) { 582274958Sdim CurChild->erase(Offset, NumBytes); 583274958Sdim return; 584274958Sdim } 585274958Sdim 586274958Sdim // If this deletion request starts somewhere in the middle of the child, it 587274958Sdim // must be deleting to the end of the child. 588274958Sdim if (Offset) { 589274958Sdim unsigned BytesFromChild = CurChild->size()-Offset; 590274958Sdim CurChild->erase(Offset, BytesFromChild); 591274958Sdim NumBytes -= BytesFromChild; 592274958Sdim // Start at the beginning of the next child. 593274958Sdim Offset = 0; 594274958Sdim ++i; 595274958Sdim continue; 596274958Sdim } 597274958Sdim 598274958Sdim // If the deletion request completely covers the child, delete it and move 599274958Sdim // the rest down. 600274958Sdim NumBytes -= CurChild->size(); 601274958Sdim CurChild->Destroy(); 602274958Sdim --NumChildren; 603274958Sdim if (i != getNumChildren()) 604274958Sdim memmove(&Children[i], &Children[i+1], 605274958Sdim (getNumChildren()-i)*sizeof(Children[0])); 606274958Sdim } 607274958Sdim} 608274958Sdim 609274958Sdim//===----------------------------------------------------------------------===// 610274958Sdim// RopePieceBTreeNode Implementation 611274958Sdim//===----------------------------------------------------------------------===// 612274958Sdim 613274958Sdimvoid RopePieceBTreeNode::Destroy() { 614274958Sdim if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this)) 615274958Sdim delete Leaf; 616274958Sdim else 617274958Sdim delete cast<RopePieceBTreeInterior>(this); 618274958Sdim} 619274958Sdim 620274958Sdim/// split - Split the range containing the specified offset so that we are 621274958Sdim/// guaranteed that there is a place to do an insertion at the specified 622274958Sdim/// offset. The offset is relative, so "0" is the start of the node. 623274958Sdim/// 624274958Sdim/// If there is no space in this subtree for the extra piece, the extra tree 625274958Sdim/// node is returned and must be inserted into a parent. 626274958SdimRopePieceBTreeNode *RopePieceBTreeNode::split(unsigned Offset) { 627274958Sdim assert(Offset <= size() && "Invalid offset to split!"); 628274958Sdim if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this)) 629274958Sdim return Leaf->split(Offset); 630274958Sdim return cast<RopePieceBTreeInterior>(this)->split(Offset); 631274958Sdim} 632274958Sdim 633274958Sdim/// insert - Insert the specified ropepiece into this tree node at the 634274958Sdim/// specified offset. The offset is relative, so "0" is the start of the 635274958Sdim/// node. 636274958Sdim/// 637274958Sdim/// If there is no space in this subtree for the extra piece, the extra tree 638274958Sdim/// node is returned and must be inserted into a parent. 639274958SdimRopePieceBTreeNode *RopePieceBTreeNode::insert(unsigned Offset, 640274958Sdim const RopePiece &R) { 641274958Sdim assert(Offset <= size() && "Invalid offset to insert!"); 642274958Sdim if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this)) 643274958Sdim return Leaf->insert(Offset, R); 644274958Sdim return cast<RopePieceBTreeInterior>(this)->insert(Offset, R); 645274958Sdim} 646274958Sdim 647274958Sdim/// erase - Remove NumBytes from this node at the specified offset. We are 648274958Sdim/// guaranteed that there is a split at Offset. 649274958Sdimvoid RopePieceBTreeNode::erase(unsigned Offset, unsigned NumBytes) { 650274958Sdim assert(Offset+NumBytes <= size() && "Invalid offset to erase!"); 651274958Sdim if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this)) 652274958Sdim return Leaf->erase(Offset, NumBytes); 653274958Sdim return cast<RopePieceBTreeInterior>(this)->erase(Offset, NumBytes); 654274958Sdim} 655274958Sdim 656274958Sdim 657274958Sdim//===----------------------------------------------------------------------===// 658274958Sdim// RopePieceBTreeIterator Implementation 659274958Sdim//===----------------------------------------------------------------------===// 660274958Sdim 661274958Sdimstatic const RopePieceBTreeLeaf *getCN(const void *P) { 662274958Sdim return static_cast<const RopePieceBTreeLeaf*>(P); 663274958Sdim} 664274958Sdim 665274958Sdim// begin iterator. 666274958SdimRopePieceBTreeIterator::RopePieceBTreeIterator(const void *n) { 667274958Sdim const RopePieceBTreeNode *N = static_cast<const RopePieceBTreeNode*>(n); 668274958Sdim 669274958Sdim // Walk down the left side of the tree until we get to a leaf. 670274958Sdim while (const RopePieceBTreeInterior *IN = dyn_cast<RopePieceBTreeInterior>(N)) 671274958Sdim N = IN->getChild(0); 672274958Sdim 673274958Sdim // We must have at least one leaf. 674274958Sdim CurNode = cast<RopePieceBTreeLeaf>(N); 675274958Sdim 676274958Sdim // If we found a leaf that happens to be empty, skip over it until we get 677274958Sdim // to something full. 678274958Sdim while (CurNode && getCN(CurNode)->getNumPieces() == 0) 679274958Sdim CurNode = getCN(CurNode)->getNextLeafInOrder(); 680274958Sdim 681274958Sdim if (CurNode) 682274958Sdim CurPiece = &getCN(CurNode)->getPiece(0); 683274958Sdim else // Empty tree, this is an end() iterator. 684274958Sdim CurPiece = nullptr; 685274958Sdim CurChar = 0; 686274958Sdim} 687274958Sdim 688274958Sdimvoid RopePieceBTreeIterator::MoveToNextPiece() { 689274958Sdim if (CurPiece != &getCN(CurNode)->getPiece(getCN(CurNode)->getNumPieces()-1)) { 690274958Sdim CurChar = 0; 691274958Sdim ++CurPiece; 692274958Sdim return; 693274958Sdim } 694274958Sdim 695274958Sdim // Find the next non-empty leaf node. 696274958Sdim do 697274958Sdim CurNode = getCN(CurNode)->getNextLeafInOrder(); 698274958Sdim while (CurNode && getCN(CurNode)->getNumPieces() == 0); 699274958Sdim 700274958Sdim if (CurNode) 701274958Sdim CurPiece = &getCN(CurNode)->getPiece(0); 702274958Sdim else // Hit end(). 703274958Sdim CurPiece = nullptr; 704274958Sdim CurChar = 0; 705274958Sdim} 706274958Sdim 707274958Sdim//===----------------------------------------------------------------------===// 708274958Sdim// RopePieceBTree Implementation 709274958Sdim//===----------------------------------------------------------------------===// 710274958Sdim 711274958Sdimstatic RopePieceBTreeNode *getRoot(void *P) { 712274958Sdim return static_cast<RopePieceBTreeNode*>(P); 713274958Sdim} 714274958Sdim 715274958SdimRopePieceBTree::RopePieceBTree() { 716274958Sdim Root = new RopePieceBTreeLeaf(); 717274958Sdim} 718274958SdimRopePieceBTree::RopePieceBTree(const RopePieceBTree &RHS) { 719274958Sdim assert(RHS.empty() && "Can't copy non-empty tree yet"); 720274958Sdim Root = new RopePieceBTreeLeaf(); 721274958Sdim} 722274958SdimRopePieceBTree::~RopePieceBTree() { 723274958Sdim getRoot(Root)->Destroy(); 724274958Sdim} 725274958Sdim 726274958Sdimunsigned RopePieceBTree::size() const { 727274958Sdim return getRoot(Root)->size(); 728274958Sdim} 729274958Sdim 730274958Sdimvoid RopePieceBTree::clear() { 731274958Sdim if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(getRoot(Root))) 732274958Sdim Leaf->clear(); 733274958Sdim else { 734274958Sdim getRoot(Root)->Destroy(); 735274958Sdim Root = new RopePieceBTreeLeaf(); 736274958Sdim } 737274958Sdim} 738274958Sdim 739274958Sdimvoid RopePieceBTree::insert(unsigned Offset, const RopePiece &R) { 740274958Sdim // #1. Split at Offset. 741274958Sdim if (RopePieceBTreeNode *RHS = getRoot(Root)->split(Offset)) 742274958Sdim Root = new RopePieceBTreeInterior(getRoot(Root), RHS); 743274958Sdim 744274958Sdim // #2. Do the insertion. 745274958Sdim if (RopePieceBTreeNode *RHS = getRoot(Root)->insert(Offset, R)) 746274958Sdim Root = new RopePieceBTreeInterior(getRoot(Root), RHS); 747274958Sdim} 748274958Sdim 749274958Sdimvoid RopePieceBTree::erase(unsigned Offset, unsigned NumBytes) { 750274958Sdim // #1. Split at Offset. 751274958Sdim if (RopePieceBTreeNode *RHS = getRoot(Root)->split(Offset)) 752274958Sdim Root = new RopePieceBTreeInterior(getRoot(Root), RHS); 753274958Sdim 754274958Sdim // #2. Do the erasing. 755274958Sdim getRoot(Root)->erase(Offset, NumBytes); 756274958Sdim} 757274958Sdim 758274958Sdim//===----------------------------------------------------------------------===// 759274958Sdim// RewriteRope Implementation 760274958Sdim//===----------------------------------------------------------------------===// 761274958Sdim 762274958Sdim/// MakeRopeString - This copies the specified byte range into some instance of 763274958Sdim/// RopeRefCountString, and return a RopePiece that represents it. This uses 764274958Sdim/// the AllocBuffer object to aggregate requests for small strings into one 765274958Sdim/// allocation instead of doing tons of tiny allocations. 766274958SdimRopePiece RewriteRope::MakeRopeString(const char *Start, const char *End) { 767274958Sdim unsigned Len = End-Start; 768274958Sdim assert(Len && "Zero length RopePiece is invalid!"); 769274958Sdim 770274958Sdim // If we have space for this string in the current alloc buffer, use it. 771274958Sdim if (AllocOffs+Len <= AllocChunkSize) { 772274958Sdim memcpy(AllocBuffer->Data+AllocOffs, Start, Len); 773274958Sdim AllocOffs += Len; 774274958Sdim return RopePiece(AllocBuffer, AllocOffs-Len, AllocOffs); 775274958Sdim } 776274958Sdim 777274958Sdim // If we don't have enough room because this specific allocation is huge, 778274958Sdim // just allocate a new rope piece for it alone. 779274958Sdim if (Len > AllocChunkSize) { 780274958Sdim unsigned Size = End-Start+sizeof(RopeRefCountString)-1; 781274958Sdim RopeRefCountString *Res = 782274958Sdim reinterpret_cast<RopeRefCountString *>(new char[Size]); 783274958Sdim Res->RefCount = 0; 784274958Sdim memcpy(Res->Data, Start, End-Start); 785274958Sdim return RopePiece(Res, 0, End-Start); 786274958Sdim } 787274958Sdim 788274958Sdim // Otherwise, this was a small request but we just don't have space for it 789274958Sdim // Make a new chunk and share it with later allocations. 790274958Sdim 791274958Sdim unsigned AllocSize = offsetof(RopeRefCountString, Data) + AllocChunkSize; 792280031Sdim RopeRefCountString *Res = 793280031Sdim reinterpret_cast<RopeRefCountString *>(new char[AllocSize]); 794280031Sdim Res->RefCount = 0; 795280031Sdim memcpy(Res->Data, Start, Len); 796280031Sdim AllocBuffer = Res; 797274958Sdim AllocOffs = Len; 798274958Sdim 799274958Sdim return RopePiece(AllocBuffer, 0, Len); 800274958Sdim} 801274958Sdim 802274958Sdim 803