1243791Sdim//===--- RewriteRope.cpp - Rope specialized for rewriter --------*- C++ -*-===// 2243791Sdim// 3243791Sdim// The LLVM Compiler Infrastructure 4243791Sdim// 5243791Sdim// This file is distributed under the University of Illinois Open Source 6243791Sdim// License. See LICENSE.TXT for details. 7243791Sdim// 8243791Sdim//===----------------------------------------------------------------------===// 9243791Sdim// 10243791Sdim// This file implements the RewriteRope class, which is a powerful string. 11243791Sdim// 12243791Sdim//===----------------------------------------------------------------------===// 13243791Sdim 14243791Sdim#include "clang/Rewrite/Core/RewriteRope.h" 15243791Sdim#include "clang/Basic/LLVM.h" 16243791Sdim#include <algorithm> 17243791Sdimusing namespace clang; 18243791Sdim 19243791Sdim/// RewriteRope is a "strong" string class, designed to make insertions and 20243791Sdim/// deletions in the middle of the string nearly constant time (really, they are 21243791Sdim/// O(log N), but with a very low constant factor). 22243791Sdim/// 23243791Sdim/// The implementation of this datastructure is a conceptual linear sequence of 24243791Sdim/// RopePiece elements. Each RopePiece represents a view on a separately 25243791Sdim/// allocated and reference counted string. This means that splitting a very 26243791Sdim/// long string can be done in constant time by splitting a RopePiece that 27243791Sdim/// references the whole string into two rope pieces that reference each half. 28243791Sdim/// Once split, another string can be inserted in between the two halves by 29243791Sdim/// inserting a RopePiece in between the two others. All of this is very 30243791Sdim/// inexpensive: it takes time proportional to the number of RopePieces, not the 31243791Sdim/// length of the strings they represent. 32243791Sdim/// 33243791Sdim/// While a linear sequences of RopePieces is the conceptual model, the actual 34243791Sdim/// implementation captures them in an adapted B+ Tree. Using a B+ tree (which 35243791Sdim/// is a tree that keeps the values in the leaves and has where each node 36243791Sdim/// contains a reasonable number of pointers to children/values) allows us to 37243791Sdim/// maintain efficient operation when the RewriteRope contains a *huge* number 38243791Sdim/// of RopePieces. The basic idea of the B+ Tree is that it allows us to find 39243791Sdim/// the RopePiece corresponding to some offset very efficiently, and it 40243791Sdim/// automatically balances itself on insertions of RopePieces (which can happen 41243791Sdim/// for both insertions and erases of string ranges). 42243791Sdim/// 43243791Sdim/// The one wrinkle on the theory is that we don't attempt to keep the tree 44243791Sdim/// properly balanced when erases happen. Erases of string data can both insert 45243791Sdim/// new RopePieces (e.g. when the middle of some other rope piece is deleted, 46243791Sdim/// which results in two rope pieces, which is just like an insert) or it can 47243791Sdim/// reduce the number of RopePieces maintained by the B+Tree. In the case when 48243791Sdim/// the number of RopePieces is reduced, we don't attempt to maintain the 49243791Sdim/// standard 'invariant' that each node in the tree contains at least 50243791Sdim/// 'WidthFactor' children/values. For our use cases, this doesn't seem to 51243791Sdim/// matter. 52243791Sdim/// 53243791Sdim/// The implementation below is primarily implemented in terms of three classes: 54243791Sdim/// RopePieceBTreeNode - Common base class for: 55243791Sdim/// 56243791Sdim/// RopePieceBTreeLeaf - Directly manages up to '2*WidthFactor' RopePiece 57243791Sdim/// nodes. This directly represents a chunk of the string with those 58243791Sdim/// RopePieces contatenated. 59243791Sdim/// RopePieceBTreeInterior - An interior node in the B+ Tree, which manages 60243791Sdim/// up to '2*WidthFactor' other nodes in the tree. 61243791Sdim 62243791Sdim 63243791Sdim//===----------------------------------------------------------------------===// 64243791Sdim// RopePieceBTreeNode Class 65243791Sdim//===----------------------------------------------------------------------===// 66243791Sdim 67243791Sdimnamespace { 68243791Sdim /// RopePieceBTreeNode - Common base class of RopePieceBTreeLeaf and 69243791Sdim /// RopePieceBTreeInterior. This provides some 'virtual' dispatching methods 70243791Sdim /// and a flag that determines which subclass the instance is. Also 71243791Sdim /// important, this node knows the full extend of the node, including any 72243791Sdim /// children that it has. This allows efficient skipping over entire subtrees 73243791Sdim /// when looking for an offset in the BTree. 74243791Sdim class RopePieceBTreeNode { 75243791Sdim protected: 76243791Sdim /// WidthFactor - This controls the number of K/V slots held in the BTree: 77243791Sdim /// how wide it is. Each level of the BTree is guaranteed to have at least 78243791Sdim /// 'WidthFactor' elements in it (either ropepieces or children), (except 79243791Sdim /// the root, which may have less) and may have at most 2*WidthFactor 80243791Sdim /// elements. 81243791Sdim enum { WidthFactor = 8 }; 82243791Sdim 83243791Sdim /// Size - This is the number of bytes of file this node (including any 84243791Sdim /// potential children) covers. 85243791Sdim unsigned Size; 86243791Sdim 87243791Sdim /// IsLeaf - True if this is an instance of RopePieceBTreeLeaf, false if it 88243791Sdim /// is an instance of RopePieceBTreeInterior. 89243791Sdim bool IsLeaf; 90243791Sdim 91243791Sdim RopePieceBTreeNode(bool isLeaf) : Size(0), IsLeaf(isLeaf) {} 92243791Sdim ~RopePieceBTreeNode() {} 93243791Sdim public: 94243791Sdim 95243791Sdim bool isLeaf() const { return IsLeaf; } 96243791Sdim unsigned size() const { return Size; } 97243791Sdim 98243791Sdim void Destroy(); 99243791Sdim 100243791Sdim /// split - Split the range containing the specified offset so that we are 101243791Sdim /// guaranteed that there is a place to do an insertion at the specified 102243791Sdim /// offset. The offset is relative, so "0" is the start of the node. 103243791Sdim /// 104243791Sdim /// If there is no space in this subtree for the extra piece, the extra tree 105243791Sdim /// node is returned and must be inserted into a parent. 106243791Sdim RopePieceBTreeNode *split(unsigned Offset); 107243791Sdim 108243791Sdim /// insert - Insert the specified ropepiece into this tree node at the 109243791Sdim /// specified offset. The offset is relative, so "0" is the start of the 110243791Sdim /// node. 111243791Sdim /// 112243791Sdim /// If there is no space in this subtree for the extra piece, the extra tree 113243791Sdim /// node is returned and must be inserted into a parent. 114243791Sdim RopePieceBTreeNode *insert(unsigned Offset, const RopePiece &R); 115243791Sdim 116243791Sdim /// erase - Remove NumBytes from this node at the specified offset. We are 117243791Sdim /// guaranteed that there is a split at Offset. 118243791Sdim void erase(unsigned Offset, unsigned NumBytes); 119243791Sdim 120243791Sdim }; 121243791Sdim} // end anonymous namespace 122243791Sdim 123243791Sdim//===----------------------------------------------------------------------===// 124243791Sdim// RopePieceBTreeLeaf Class 125243791Sdim//===----------------------------------------------------------------------===// 126243791Sdim 127243791Sdimnamespace { 128243791Sdim /// RopePieceBTreeLeaf - Directly manages up to '2*WidthFactor' RopePiece 129243791Sdim /// nodes. This directly represents a chunk of the string with those 130243791Sdim /// RopePieces contatenated. Since this is a B+Tree, all values (in this case 131243791Sdim /// instances of RopePiece) are stored in leaves like this. To make iteration 132243791Sdim /// over the leaves efficient, they maintain a singly linked list through the 133243791Sdim /// NextLeaf field. This allows the B+Tree forward iterator to be constant 134243791Sdim /// time for all increments. 135243791Sdim class RopePieceBTreeLeaf : public RopePieceBTreeNode { 136243791Sdim /// NumPieces - This holds the number of rope pieces currently active in the 137243791Sdim /// Pieces array. 138243791Sdim unsigned char NumPieces; 139243791Sdim 140243791Sdim /// Pieces - This tracks the file chunks currently in this leaf. 141243791Sdim /// 142243791Sdim RopePiece Pieces[2*WidthFactor]; 143243791Sdim 144243791Sdim /// NextLeaf - This is a pointer to the next leaf in the tree, allowing 145243791Sdim /// efficient in-order forward iteration of the tree without traversal. 146243791Sdim RopePieceBTreeLeaf **PrevLeaf, *NextLeaf; 147243791Sdim public: 148243791Sdim RopePieceBTreeLeaf() : RopePieceBTreeNode(true), NumPieces(0), 149243791Sdim PrevLeaf(0), NextLeaf(0) {} 150243791Sdim ~RopePieceBTreeLeaf() { 151243791Sdim if (PrevLeaf || NextLeaf) 152243791Sdim removeFromLeafInOrder(); 153243791Sdim clear(); 154243791Sdim } 155243791Sdim 156243791Sdim bool isFull() const { return NumPieces == 2*WidthFactor; } 157243791Sdim 158243791Sdim /// clear - Remove all rope pieces from this leaf. 159243791Sdim void clear() { 160243791Sdim while (NumPieces) 161243791Sdim Pieces[--NumPieces] = RopePiece(); 162243791Sdim Size = 0; 163243791Sdim } 164243791Sdim 165243791Sdim unsigned getNumPieces() const { return NumPieces; } 166243791Sdim 167243791Sdim const RopePiece &getPiece(unsigned i) const { 168243791Sdim assert(i < getNumPieces() && "Invalid piece ID"); 169243791Sdim return Pieces[i]; 170243791Sdim } 171243791Sdim 172243791Sdim const RopePieceBTreeLeaf *getNextLeafInOrder() const { return NextLeaf; } 173243791Sdim void insertAfterLeafInOrder(RopePieceBTreeLeaf *Node) { 174243791Sdim assert(PrevLeaf == 0 && NextLeaf == 0 && "Already in ordering"); 175243791Sdim 176243791Sdim NextLeaf = Node->NextLeaf; 177243791Sdim if (NextLeaf) 178243791Sdim NextLeaf->PrevLeaf = &NextLeaf; 179243791Sdim PrevLeaf = &Node->NextLeaf; 180243791Sdim Node->NextLeaf = this; 181243791Sdim } 182243791Sdim 183243791Sdim void removeFromLeafInOrder() { 184243791Sdim if (PrevLeaf) { 185243791Sdim *PrevLeaf = NextLeaf; 186243791Sdim if (NextLeaf) 187243791Sdim NextLeaf->PrevLeaf = PrevLeaf; 188243791Sdim } else if (NextLeaf) { 189243791Sdim NextLeaf->PrevLeaf = 0; 190243791Sdim } 191243791Sdim } 192243791Sdim 193243791Sdim /// FullRecomputeSizeLocally - This method recomputes the 'Size' field by 194243791Sdim /// summing the size of all RopePieces. 195243791Sdim void FullRecomputeSizeLocally() { 196243791Sdim Size = 0; 197243791Sdim for (unsigned i = 0, e = getNumPieces(); i != e; ++i) 198243791Sdim Size += getPiece(i).size(); 199243791Sdim } 200243791Sdim 201243791Sdim /// split - Split the range containing the specified offset so that we are 202243791Sdim /// guaranteed that there is a place to do an insertion at the specified 203243791Sdim /// offset. The offset is relative, so "0" is the start of the node. 204243791Sdim /// 205243791Sdim /// If there is no space in this subtree for the extra piece, the extra tree 206243791Sdim /// node is returned and must be inserted into a parent. 207243791Sdim RopePieceBTreeNode *split(unsigned Offset); 208243791Sdim 209243791Sdim /// insert - Insert the specified ropepiece into this tree node at the 210243791Sdim /// specified offset. The offset is relative, so "0" is the start of the 211243791Sdim /// node. 212243791Sdim /// 213243791Sdim /// If there is no space in this subtree for the extra piece, the extra tree 214243791Sdim /// node is returned and must be inserted into a parent. 215243791Sdim RopePieceBTreeNode *insert(unsigned Offset, const RopePiece &R); 216243791Sdim 217243791Sdim 218243791Sdim /// erase - Remove NumBytes from this node at the specified offset. We are 219243791Sdim /// guaranteed that there is a split at Offset. 220243791Sdim void erase(unsigned Offset, unsigned NumBytes); 221243791Sdim 222243791Sdim static inline bool classof(const RopePieceBTreeNode *N) { 223243791Sdim return N->isLeaf(); 224243791Sdim } 225243791Sdim }; 226243791Sdim} // end anonymous namespace 227243791Sdim 228243791Sdim/// split - Split the range containing the specified offset so that we are 229243791Sdim/// guaranteed that there is a place to do an insertion at the specified 230243791Sdim/// offset. The offset is relative, so "0" is the start of the node. 231243791Sdim/// 232243791Sdim/// If there is no space in this subtree for the extra piece, the extra tree 233243791Sdim/// node is returned and must be inserted into a parent. 234243791SdimRopePieceBTreeNode *RopePieceBTreeLeaf::split(unsigned Offset) { 235243791Sdim // Find the insertion point. We are guaranteed that there is a split at the 236243791Sdim // specified offset so find it. 237243791Sdim if (Offset == 0 || Offset == size()) { 238243791Sdim // Fastpath for a common case. There is already a splitpoint at the end. 239243791Sdim return 0; 240243791Sdim } 241243791Sdim 242243791Sdim // Find the piece that this offset lands in. 243243791Sdim unsigned PieceOffs = 0; 244243791Sdim unsigned i = 0; 245243791Sdim while (Offset >= PieceOffs+Pieces[i].size()) { 246243791Sdim PieceOffs += Pieces[i].size(); 247243791Sdim ++i; 248243791Sdim } 249243791Sdim 250243791Sdim // If there is already a split point at the specified offset, just return 251243791Sdim // success. 252243791Sdim if (PieceOffs == Offset) 253243791Sdim return 0; 254243791Sdim 255243791Sdim // Otherwise, we need to split piece 'i' at Offset-PieceOffs. Convert Offset 256243791Sdim // to being Piece relative. 257243791Sdim unsigned IntraPieceOffset = Offset-PieceOffs; 258243791Sdim 259243791Sdim // We do this by shrinking the RopePiece and then doing an insert of the tail. 260243791Sdim RopePiece Tail(Pieces[i].StrData, Pieces[i].StartOffs+IntraPieceOffset, 261243791Sdim Pieces[i].EndOffs); 262243791Sdim Size -= Pieces[i].size(); 263243791Sdim Pieces[i].EndOffs = Pieces[i].StartOffs+IntraPieceOffset; 264243791Sdim Size += Pieces[i].size(); 265243791Sdim 266243791Sdim return insert(Offset, Tail); 267243791Sdim} 268243791Sdim 269243791Sdim 270243791Sdim/// insert - Insert the specified RopePiece into this tree node at the 271243791Sdim/// specified offset. The offset is relative, so "0" is the start of the node. 272243791Sdim/// 273243791Sdim/// If there is no space in this subtree for the extra piece, the extra tree 274243791Sdim/// node is returned and must be inserted into a parent. 275243791SdimRopePieceBTreeNode *RopePieceBTreeLeaf::insert(unsigned Offset, 276243791Sdim const RopePiece &R) { 277243791Sdim // If this node is not full, insert the piece. 278243791Sdim if (!isFull()) { 279243791Sdim // Find the insertion point. We are guaranteed that there is a split at the 280243791Sdim // specified offset so find it. 281243791Sdim unsigned i = 0, e = getNumPieces(); 282243791Sdim if (Offset == size()) { 283243791Sdim // Fastpath for a common case. 284243791Sdim i = e; 285243791Sdim } else { 286243791Sdim unsigned SlotOffs = 0; 287243791Sdim for (; Offset > SlotOffs; ++i) 288243791Sdim SlotOffs += getPiece(i).size(); 289243791Sdim assert(SlotOffs == Offset && "Split didn't occur before insertion!"); 290243791Sdim } 291243791Sdim 292243791Sdim // For an insertion into a non-full leaf node, just insert the value in 293243791Sdim // its sorted position. This requires moving later values over. 294243791Sdim for (; i != e; --e) 295243791Sdim Pieces[e] = Pieces[e-1]; 296243791Sdim Pieces[i] = R; 297243791Sdim ++NumPieces; 298243791Sdim Size += R.size(); 299243791Sdim return 0; 300243791Sdim } 301243791Sdim 302243791Sdim // Otherwise, if this is leaf is full, split it in two halves. Since this 303243791Sdim // node is full, it contains 2*WidthFactor values. We move the first 304243791Sdim // 'WidthFactor' values to the LHS child (which we leave in this node) and 305243791Sdim // move the last 'WidthFactor' values into the RHS child. 306243791Sdim 307243791Sdim // Create the new node. 308243791Sdim RopePieceBTreeLeaf *NewNode = new RopePieceBTreeLeaf(); 309243791Sdim 310243791Sdim // Move over the last 'WidthFactor' values from here to NewNode. 311243791Sdim std::copy(&Pieces[WidthFactor], &Pieces[2*WidthFactor], 312243791Sdim &NewNode->Pieces[0]); 313243791Sdim // Replace old pieces with null RopePieces to drop refcounts. 314243791Sdim std::fill(&Pieces[WidthFactor], &Pieces[2*WidthFactor], RopePiece()); 315243791Sdim 316243791Sdim // Decrease the number of values in the two nodes. 317243791Sdim NewNode->NumPieces = NumPieces = WidthFactor; 318243791Sdim 319243791Sdim // Recompute the two nodes' size. 320243791Sdim NewNode->FullRecomputeSizeLocally(); 321243791Sdim FullRecomputeSizeLocally(); 322243791Sdim 323243791Sdim // Update the list of leaves. 324243791Sdim NewNode->insertAfterLeafInOrder(this); 325243791Sdim 326243791Sdim // These insertions can't fail. 327243791Sdim if (this->size() >= Offset) 328243791Sdim this->insert(Offset, R); 329243791Sdim else 330243791Sdim NewNode->insert(Offset - this->size(), R); 331243791Sdim return NewNode; 332243791Sdim} 333243791Sdim 334243791Sdim/// erase - Remove NumBytes from this node at the specified offset. We are 335243791Sdim/// guaranteed that there is a split at Offset. 336243791Sdimvoid RopePieceBTreeLeaf::erase(unsigned Offset, unsigned NumBytes) { 337243791Sdim // Since we are guaranteed that there is a split at Offset, we start by 338243791Sdim // finding the Piece that starts there. 339243791Sdim unsigned PieceOffs = 0; 340243791Sdim unsigned i = 0; 341243791Sdim for (; Offset > PieceOffs; ++i) 342243791Sdim PieceOffs += getPiece(i).size(); 343243791Sdim assert(PieceOffs == Offset && "Split didn't occur before erase!"); 344243791Sdim 345243791Sdim unsigned StartPiece = i; 346243791Sdim 347243791Sdim // Figure out how many pieces completely cover 'NumBytes'. We want to remove 348243791Sdim // all of them. 349243791Sdim for (; Offset+NumBytes > PieceOffs+getPiece(i).size(); ++i) 350243791Sdim PieceOffs += getPiece(i).size(); 351243791Sdim 352243791Sdim // If we exactly include the last one, include it in the region to delete. 353243791Sdim if (Offset+NumBytes == PieceOffs+getPiece(i).size()) 354243791Sdim PieceOffs += getPiece(i).size(), ++i; 355243791Sdim 356243791Sdim // If we completely cover some RopePieces, erase them now. 357243791Sdim if (i != StartPiece) { 358243791Sdim unsigned NumDeleted = i-StartPiece; 359243791Sdim for (; i != getNumPieces(); ++i) 360243791Sdim Pieces[i-NumDeleted] = Pieces[i]; 361243791Sdim 362243791Sdim // Drop references to dead rope pieces. 363243791Sdim std::fill(&Pieces[getNumPieces()-NumDeleted], &Pieces[getNumPieces()], 364243791Sdim RopePiece()); 365243791Sdim NumPieces -= NumDeleted; 366243791Sdim 367243791Sdim unsigned CoverBytes = PieceOffs-Offset; 368243791Sdim NumBytes -= CoverBytes; 369243791Sdim Size -= CoverBytes; 370243791Sdim } 371243791Sdim 372243791Sdim // If we completely removed some stuff, we could be done. 373243791Sdim if (NumBytes == 0) return; 374243791Sdim 375243791Sdim // Okay, now might be erasing part of some Piece. If this is the case, then 376243791Sdim // move the start point of the piece. 377243791Sdim assert(getPiece(StartPiece).size() > NumBytes); 378243791Sdim Pieces[StartPiece].StartOffs += NumBytes; 379243791Sdim 380243791Sdim // The size of this node just shrunk by NumBytes. 381243791Sdim Size -= NumBytes; 382243791Sdim} 383243791Sdim 384243791Sdim//===----------------------------------------------------------------------===// 385243791Sdim// RopePieceBTreeInterior Class 386243791Sdim//===----------------------------------------------------------------------===// 387243791Sdim 388243791Sdimnamespace { 389243791Sdim /// RopePieceBTreeInterior - This represents an interior node in the B+Tree, 390243791Sdim /// which holds up to 2*WidthFactor pointers to child nodes. 391243791Sdim class RopePieceBTreeInterior : public RopePieceBTreeNode { 392243791Sdim /// NumChildren - This holds the number of children currently active in the 393243791Sdim /// Children array. 394243791Sdim unsigned char NumChildren; 395243791Sdim RopePieceBTreeNode *Children[2*WidthFactor]; 396243791Sdim public: 397243791Sdim RopePieceBTreeInterior() : RopePieceBTreeNode(false), NumChildren(0) {} 398243791Sdim 399243791Sdim RopePieceBTreeInterior(RopePieceBTreeNode *LHS, RopePieceBTreeNode *RHS) 400243791Sdim : RopePieceBTreeNode(false) { 401243791Sdim Children[0] = LHS; 402243791Sdim Children[1] = RHS; 403243791Sdim NumChildren = 2; 404243791Sdim Size = LHS->size() + RHS->size(); 405243791Sdim } 406243791Sdim 407243791Sdim ~RopePieceBTreeInterior() { 408243791Sdim for (unsigned i = 0, e = getNumChildren(); i != e; ++i) 409243791Sdim Children[i]->Destroy(); 410243791Sdim } 411243791Sdim 412243791Sdim bool isFull() const { return NumChildren == 2*WidthFactor; } 413243791Sdim 414243791Sdim unsigned getNumChildren() const { return NumChildren; } 415243791Sdim const RopePieceBTreeNode *getChild(unsigned i) const { 416243791Sdim assert(i < NumChildren && "invalid child #"); 417243791Sdim return Children[i]; 418243791Sdim } 419243791Sdim RopePieceBTreeNode *getChild(unsigned i) { 420243791Sdim assert(i < NumChildren && "invalid child #"); 421243791Sdim return Children[i]; 422243791Sdim } 423243791Sdim 424243791Sdim /// FullRecomputeSizeLocally - Recompute the Size field of this node by 425243791Sdim /// summing up the sizes of the child nodes. 426243791Sdim void FullRecomputeSizeLocally() { 427243791Sdim Size = 0; 428243791Sdim for (unsigned i = 0, e = getNumChildren(); i != e; ++i) 429243791Sdim Size += getChild(i)->size(); 430243791Sdim } 431243791Sdim 432243791Sdim 433243791Sdim /// split - Split the range containing the specified offset so that we are 434243791Sdim /// guaranteed that there is a place to do an insertion at the specified 435243791Sdim /// offset. The offset is relative, so "0" is the start of the node. 436243791Sdim /// 437243791Sdim /// If there is no space in this subtree for the extra piece, the extra tree 438243791Sdim /// node is returned and must be inserted into a parent. 439243791Sdim RopePieceBTreeNode *split(unsigned Offset); 440243791Sdim 441243791Sdim 442243791Sdim /// insert - Insert the specified ropepiece into this tree node at the 443243791Sdim /// specified offset. The offset is relative, so "0" is the start of the 444243791Sdim /// node. 445243791Sdim /// 446243791Sdim /// If there is no space in this subtree for the extra piece, the extra tree 447243791Sdim /// node is returned and must be inserted into a parent. 448243791Sdim RopePieceBTreeNode *insert(unsigned Offset, const RopePiece &R); 449243791Sdim 450243791Sdim /// HandleChildPiece - A child propagated an insertion result up to us. 451243791Sdim /// Insert the new child, and/or propagate the result further up the tree. 452243791Sdim RopePieceBTreeNode *HandleChildPiece(unsigned i, RopePieceBTreeNode *RHS); 453243791Sdim 454243791Sdim /// erase - Remove NumBytes from this node at the specified offset. We are 455243791Sdim /// guaranteed that there is a split at Offset. 456243791Sdim void erase(unsigned Offset, unsigned NumBytes); 457243791Sdim 458243791Sdim static inline bool classof(const RopePieceBTreeNode *N) { 459243791Sdim return !N->isLeaf(); 460243791Sdim } 461243791Sdim }; 462243791Sdim} // end anonymous namespace 463243791Sdim 464243791Sdim/// split - Split the range containing the specified offset so that we are 465243791Sdim/// guaranteed that there is a place to do an insertion at the specified 466243791Sdim/// offset. The offset is relative, so "0" is the start of the node. 467243791Sdim/// 468243791Sdim/// If there is no space in this subtree for the extra piece, the extra tree 469243791Sdim/// node is returned and must be inserted into a parent. 470243791SdimRopePieceBTreeNode *RopePieceBTreeInterior::split(unsigned Offset) { 471243791Sdim // Figure out which child to split. 472243791Sdim if (Offset == 0 || Offset == size()) 473243791Sdim return 0; // If we have an exact offset, we're already split. 474243791Sdim 475243791Sdim unsigned ChildOffset = 0; 476243791Sdim unsigned i = 0; 477243791Sdim for (; Offset >= ChildOffset+getChild(i)->size(); ++i) 478243791Sdim ChildOffset += getChild(i)->size(); 479243791Sdim 480243791Sdim // If already split there, we're done. 481243791Sdim if (ChildOffset == Offset) 482243791Sdim return 0; 483243791Sdim 484243791Sdim // Otherwise, recursively split the child. 485243791Sdim if (RopePieceBTreeNode *RHS = getChild(i)->split(Offset-ChildOffset)) 486243791Sdim return HandleChildPiece(i, RHS); 487243791Sdim return 0; // Done! 488243791Sdim} 489243791Sdim 490243791Sdim/// insert - Insert the specified ropepiece into this tree node at the 491243791Sdim/// specified offset. The offset is relative, so "0" is the start of the 492243791Sdim/// node. 493243791Sdim/// 494243791Sdim/// If there is no space in this subtree for the extra piece, the extra tree 495243791Sdim/// node is returned and must be inserted into a parent. 496243791SdimRopePieceBTreeNode *RopePieceBTreeInterior::insert(unsigned Offset, 497243791Sdim const RopePiece &R) { 498243791Sdim // Find the insertion point. We are guaranteed that there is a split at the 499243791Sdim // specified offset so find it. 500243791Sdim unsigned i = 0, e = getNumChildren(); 501243791Sdim 502243791Sdim unsigned ChildOffs = 0; 503243791Sdim if (Offset == size()) { 504243791Sdim // Fastpath for a common case. Insert at end of last child. 505243791Sdim i = e-1; 506243791Sdim ChildOffs = size()-getChild(i)->size(); 507243791Sdim } else { 508243791Sdim for (; Offset > ChildOffs+getChild(i)->size(); ++i) 509243791Sdim ChildOffs += getChild(i)->size(); 510243791Sdim } 511243791Sdim 512243791Sdim Size += R.size(); 513243791Sdim 514243791Sdim // Insert at the end of this child. 515243791Sdim if (RopePieceBTreeNode *RHS = getChild(i)->insert(Offset-ChildOffs, R)) 516243791Sdim return HandleChildPiece(i, RHS); 517243791Sdim 518243791Sdim return 0; 519243791Sdim} 520243791Sdim 521243791Sdim/// HandleChildPiece - A child propagated an insertion result up to us. 522243791Sdim/// Insert the new child, and/or propagate the result further up the tree. 523243791SdimRopePieceBTreeNode * 524243791SdimRopePieceBTreeInterior::HandleChildPiece(unsigned i, RopePieceBTreeNode *RHS) { 525243791Sdim // Otherwise the child propagated a subtree up to us as a new child. See if 526243791Sdim // we have space for it here. 527243791Sdim if (!isFull()) { 528243791Sdim // Insert RHS after child 'i'. 529243791Sdim if (i + 1 != getNumChildren()) 530243791Sdim memmove(&Children[i+2], &Children[i+1], 531243791Sdim (getNumChildren()-i-1)*sizeof(Children[0])); 532243791Sdim Children[i+1] = RHS; 533243791Sdim ++NumChildren; 534243791Sdim return 0; 535243791Sdim } 536243791Sdim 537243791Sdim // Okay, this node is full. Split it in half, moving WidthFactor children to 538243791Sdim // a newly allocated interior node. 539243791Sdim 540243791Sdim // Create the new node. 541243791Sdim RopePieceBTreeInterior *NewNode = new RopePieceBTreeInterior(); 542243791Sdim 543243791Sdim // Move over the last 'WidthFactor' values from here to NewNode. 544243791Sdim memcpy(&NewNode->Children[0], &Children[WidthFactor], 545243791Sdim WidthFactor*sizeof(Children[0])); 546243791Sdim 547243791Sdim // Decrease the number of values in the two nodes. 548243791Sdim NewNode->NumChildren = NumChildren = WidthFactor; 549243791Sdim 550243791Sdim // Finally, insert the two new children in the side the can (now) hold them. 551243791Sdim // These insertions can't fail. 552243791Sdim if (i < WidthFactor) 553243791Sdim this->HandleChildPiece(i, RHS); 554243791Sdim else 555243791Sdim NewNode->HandleChildPiece(i-WidthFactor, RHS); 556243791Sdim 557243791Sdim // Recompute the two nodes' size. 558243791Sdim NewNode->FullRecomputeSizeLocally(); 559243791Sdim FullRecomputeSizeLocally(); 560243791Sdim return NewNode; 561243791Sdim} 562243791Sdim 563243791Sdim/// erase - Remove NumBytes from this node at the specified offset. We are 564243791Sdim/// guaranteed that there is a split at Offset. 565243791Sdimvoid RopePieceBTreeInterior::erase(unsigned Offset, unsigned NumBytes) { 566243791Sdim // This will shrink this node by NumBytes. 567243791Sdim Size -= NumBytes; 568243791Sdim 569243791Sdim // Find the first child that overlaps with Offset. 570243791Sdim unsigned i = 0; 571243791Sdim for (; Offset >= getChild(i)->size(); ++i) 572243791Sdim Offset -= getChild(i)->size(); 573243791Sdim 574243791Sdim // Propagate the delete request into overlapping children, or completely 575243791Sdim // delete the children as appropriate. 576243791Sdim while (NumBytes) { 577243791Sdim RopePieceBTreeNode *CurChild = getChild(i); 578243791Sdim 579243791Sdim // If we are deleting something contained entirely in the child, pass on the 580243791Sdim // request. 581243791Sdim if (Offset+NumBytes < CurChild->size()) { 582243791Sdim CurChild->erase(Offset, NumBytes); 583243791Sdim return; 584243791Sdim } 585243791Sdim 586243791Sdim // If this deletion request starts somewhere in the middle of the child, it 587243791Sdim // must be deleting to the end of the child. 588243791Sdim if (Offset) { 589243791Sdim unsigned BytesFromChild = CurChild->size()-Offset; 590243791Sdim CurChild->erase(Offset, BytesFromChild); 591243791Sdim NumBytes -= BytesFromChild; 592243791Sdim // Start at the beginning of the next child. 593243791Sdim Offset = 0; 594243791Sdim ++i; 595243791Sdim continue; 596243791Sdim } 597243791Sdim 598243791Sdim // If the deletion request completely covers the child, delete it and move 599243791Sdim // the rest down. 600243791Sdim NumBytes -= CurChild->size(); 601243791Sdim CurChild->Destroy(); 602243791Sdim --NumChildren; 603243791Sdim if (i != getNumChildren()) 604243791Sdim memmove(&Children[i], &Children[i+1], 605243791Sdim (getNumChildren()-i)*sizeof(Children[0])); 606243791Sdim } 607243791Sdim} 608243791Sdim 609243791Sdim//===----------------------------------------------------------------------===// 610243791Sdim// RopePieceBTreeNode Implementation 611243791Sdim//===----------------------------------------------------------------------===// 612243791Sdim 613243791Sdimvoid RopePieceBTreeNode::Destroy() { 614243791Sdim if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this)) 615243791Sdim delete Leaf; 616243791Sdim else 617243791Sdim delete cast<RopePieceBTreeInterior>(this); 618243791Sdim} 619243791Sdim 620243791Sdim/// split - Split the range containing the specified offset so that we are 621243791Sdim/// guaranteed that there is a place to do an insertion at the specified 622243791Sdim/// offset. The offset is relative, so "0" is the start of the node. 623243791Sdim/// 624243791Sdim/// If there is no space in this subtree for the extra piece, the extra tree 625243791Sdim/// node is returned and must be inserted into a parent. 626243791SdimRopePieceBTreeNode *RopePieceBTreeNode::split(unsigned Offset) { 627243791Sdim assert(Offset <= size() && "Invalid offset to split!"); 628243791Sdim if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this)) 629243791Sdim return Leaf->split(Offset); 630243791Sdim return cast<RopePieceBTreeInterior>(this)->split(Offset); 631243791Sdim} 632243791Sdim 633243791Sdim/// insert - Insert the specified ropepiece into this tree node at the 634243791Sdim/// specified offset. The offset is relative, so "0" is the start of the 635243791Sdim/// node. 636243791Sdim/// 637243791Sdim/// If there is no space in this subtree for the extra piece, the extra tree 638243791Sdim/// node is returned and must be inserted into a parent. 639243791SdimRopePieceBTreeNode *RopePieceBTreeNode::insert(unsigned Offset, 640243791Sdim const RopePiece &R) { 641243791Sdim assert(Offset <= size() && "Invalid offset to insert!"); 642243791Sdim if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this)) 643243791Sdim return Leaf->insert(Offset, R); 644243791Sdim return cast<RopePieceBTreeInterior>(this)->insert(Offset, R); 645243791Sdim} 646243791Sdim 647243791Sdim/// erase - Remove NumBytes from this node at the specified offset. We are 648243791Sdim/// guaranteed that there is a split at Offset. 649243791Sdimvoid RopePieceBTreeNode::erase(unsigned Offset, unsigned NumBytes) { 650243791Sdim assert(Offset+NumBytes <= size() && "Invalid offset to erase!"); 651243791Sdim if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this)) 652243791Sdim return Leaf->erase(Offset, NumBytes); 653243791Sdim return cast<RopePieceBTreeInterior>(this)->erase(Offset, NumBytes); 654243791Sdim} 655243791Sdim 656243791Sdim 657243791Sdim//===----------------------------------------------------------------------===// 658243791Sdim// RopePieceBTreeIterator Implementation 659243791Sdim//===----------------------------------------------------------------------===// 660243791Sdim 661243791Sdimstatic const RopePieceBTreeLeaf *getCN(const void *P) { 662243791Sdim return static_cast<const RopePieceBTreeLeaf*>(P); 663243791Sdim} 664243791Sdim 665243791Sdim// begin iterator. 666243791SdimRopePieceBTreeIterator::RopePieceBTreeIterator(const void *n) { 667243791Sdim const RopePieceBTreeNode *N = static_cast<const RopePieceBTreeNode*>(n); 668243791Sdim 669243791Sdim // Walk down the left side of the tree until we get to a leaf. 670243791Sdim while (const RopePieceBTreeInterior *IN = dyn_cast<RopePieceBTreeInterior>(N)) 671243791Sdim N = IN->getChild(0); 672243791Sdim 673243791Sdim // We must have at least one leaf. 674243791Sdim CurNode = cast<RopePieceBTreeLeaf>(N); 675243791Sdim 676243791Sdim // If we found a leaf that happens to be empty, skip over it until we get 677243791Sdim // to something full. 678243791Sdim while (CurNode && getCN(CurNode)->getNumPieces() == 0) 679243791Sdim CurNode = getCN(CurNode)->getNextLeafInOrder(); 680243791Sdim 681243791Sdim if (CurNode != 0) 682243791Sdim CurPiece = &getCN(CurNode)->getPiece(0); 683243791Sdim else // Empty tree, this is an end() iterator. 684243791Sdim CurPiece = 0; 685243791Sdim CurChar = 0; 686243791Sdim} 687243791Sdim 688243791Sdimvoid RopePieceBTreeIterator::MoveToNextPiece() { 689243791Sdim if (CurPiece != &getCN(CurNode)->getPiece(getCN(CurNode)->getNumPieces()-1)) { 690243791Sdim CurChar = 0; 691243791Sdim ++CurPiece; 692243791Sdim return; 693243791Sdim } 694243791Sdim 695243791Sdim // Find the next non-empty leaf node. 696243791Sdim do 697243791Sdim CurNode = getCN(CurNode)->getNextLeafInOrder(); 698243791Sdim while (CurNode && getCN(CurNode)->getNumPieces() == 0); 699243791Sdim 700243791Sdim if (CurNode != 0) 701243791Sdim CurPiece = &getCN(CurNode)->getPiece(0); 702243791Sdim else // Hit end(). 703243791Sdim CurPiece = 0; 704243791Sdim CurChar = 0; 705243791Sdim} 706243791Sdim 707243791Sdim//===----------------------------------------------------------------------===// 708243791Sdim// RopePieceBTree Implementation 709243791Sdim//===----------------------------------------------------------------------===// 710243791Sdim 711243791Sdimstatic RopePieceBTreeNode *getRoot(void *P) { 712243791Sdim return static_cast<RopePieceBTreeNode*>(P); 713243791Sdim} 714243791Sdim 715243791SdimRopePieceBTree::RopePieceBTree() { 716243791Sdim Root = new RopePieceBTreeLeaf(); 717243791Sdim} 718243791SdimRopePieceBTree::RopePieceBTree(const RopePieceBTree &RHS) { 719243791Sdim assert(RHS.empty() && "Can't copy non-empty tree yet"); 720243791Sdim Root = new RopePieceBTreeLeaf(); 721243791Sdim} 722243791SdimRopePieceBTree::~RopePieceBTree() { 723243791Sdim getRoot(Root)->Destroy(); 724243791Sdim} 725243791Sdim 726243791Sdimunsigned RopePieceBTree::size() const { 727243791Sdim return getRoot(Root)->size(); 728243791Sdim} 729243791Sdim 730243791Sdimvoid RopePieceBTree::clear() { 731243791Sdim if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(getRoot(Root))) 732243791Sdim Leaf->clear(); 733243791Sdim else { 734243791Sdim getRoot(Root)->Destroy(); 735243791Sdim Root = new RopePieceBTreeLeaf(); 736243791Sdim } 737243791Sdim} 738243791Sdim 739243791Sdimvoid RopePieceBTree::insert(unsigned Offset, const RopePiece &R) { 740243791Sdim // #1. Split at Offset. 741243791Sdim if (RopePieceBTreeNode *RHS = getRoot(Root)->split(Offset)) 742243791Sdim Root = new RopePieceBTreeInterior(getRoot(Root), RHS); 743243791Sdim 744243791Sdim // #2. Do the insertion. 745243791Sdim if (RopePieceBTreeNode *RHS = getRoot(Root)->insert(Offset, R)) 746243791Sdim Root = new RopePieceBTreeInterior(getRoot(Root), RHS); 747243791Sdim} 748243791Sdim 749243791Sdimvoid RopePieceBTree::erase(unsigned Offset, unsigned NumBytes) { 750243791Sdim // #1. Split at Offset. 751243791Sdim if (RopePieceBTreeNode *RHS = getRoot(Root)->split(Offset)) 752243791Sdim Root = new RopePieceBTreeInterior(getRoot(Root), RHS); 753243791Sdim 754243791Sdim // #2. Do the erasing. 755243791Sdim getRoot(Root)->erase(Offset, NumBytes); 756243791Sdim} 757243791Sdim 758243791Sdim//===----------------------------------------------------------------------===// 759243791Sdim// RewriteRope Implementation 760243791Sdim//===----------------------------------------------------------------------===// 761243791Sdim 762243791Sdim/// MakeRopeString - This copies the specified byte range into some instance of 763243791Sdim/// RopeRefCountString, and return a RopePiece that represents it. This uses 764243791Sdim/// the AllocBuffer object to aggregate requests for small strings into one 765243791Sdim/// allocation instead of doing tons of tiny allocations. 766243791SdimRopePiece RewriteRope::MakeRopeString(const char *Start, const char *End) { 767243791Sdim unsigned Len = End-Start; 768243791Sdim assert(Len && "Zero length RopePiece is invalid!"); 769243791Sdim 770243791Sdim // If we have space for this string in the current alloc buffer, use it. 771243791Sdim if (AllocOffs+Len <= AllocChunkSize) { 772243791Sdim memcpy(AllocBuffer->Data+AllocOffs, Start, Len); 773243791Sdim AllocOffs += Len; 774243791Sdim return RopePiece(AllocBuffer, AllocOffs-Len, AllocOffs); 775243791Sdim } 776243791Sdim 777243791Sdim // If we don't have enough room because this specific allocation is huge, 778243791Sdim // just allocate a new rope piece for it alone. 779243791Sdim if (Len > AllocChunkSize) { 780243791Sdim unsigned Size = End-Start+sizeof(RopeRefCountString)-1; 781243791Sdim RopeRefCountString *Res = 782243791Sdim reinterpret_cast<RopeRefCountString *>(new char[Size]); 783243791Sdim Res->RefCount = 0; 784243791Sdim memcpy(Res->Data, Start, End-Start); 785243791Sdim return RopePiece(Res, 0, End-Start); 786243791Sdim } 787243791Sdim 788243791Sdim // Otherwise, this was a small request but we just don't have space for it 789243791Sdim // Make a new chunk and share it with later allocations. 790243791Sdim 791243791Sdim // If we had an old allocation, drop our reference to it. 792243791Sdim if (AllocBuffer && --AllocBuffer->RefCount == 0) 793243791Sdim delete [] (char*)AllocBuffer; 794243791Sdim 795243791Sdim unsigned AllocSize = offsetof(RopeRefCountString, Data) + AllocChunkSize; 796243791Sdim AllocBuffer = reinterpret_cast<RopeRefCountString *>(new char[AllocSize]); 797243791Sdim AllocBuffer->RefCount = 0; 798243791Sdim memcpy(AllocBuffer->Data, Start, Len); 799243791Sdim AllocOffs = Len; 800243791Sdim 801243791Sdim // Start out the new allocation with a refcount of 1, since we have an 802243791Sdim // internal reference to it. 803243791Sdim AllocBuffer->addRef(); 804243791Sdim return RopePiece(AllocBuffer, 0, Len); 805243791Sdim} 806243791Sdim 807243791Sdim 808