clang API Documentation
00001 //===--- RewriteRope.cpp - Rope specialized for rewriter --------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the RewriteRope class, which is a powerful string. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 00014 #include "clang/Rewrite/RewriteRope.h" 00015 #include "clang/Basic/LLVM.h" 00016 #include <algorithm> 00017 using namespace clang; 00018 00019 /// RewriteRope is a "strong" string class, designed to make insertions and 00020 /// deletions in the middle of the string nearly constant time (really, they are 00021 /// O(log N), but with a very low constant factor). 00022 /// 00023 /// The implementation of this datastructure is a conceptual linear sequence of 00024 /// RopePiece elements. Each RopePiece represents a view on a separately 00025 /// allocated and reference counted string. This means that splitting a very 00026 /// long string can be done in constant time by splitting a RopePiece that 00027 /// references the whole string into two rope pieces that reference each half. 00028 /// Once split, another string can be inserted in between the two halves by 00029 /// inserting a RopePiece in between the two others. All of this is very 00030 /// inexpensive: it takes time proportional to the number of RopePieces, not the 00031 /// length of the strings they represent. 00032 /// 00033 /// While a linear sequences of RopePieces is the conceptual model, the actual 00034 /// implementation captures them in an adapted B+ Tree. Using a B+ tree (which 00035 /// is a tree that keeps the values in the leaves and has where each node 00036 /// contains a reasonable number of pointers to children/values) allows us to 00037 /// maintain efficient operation when the RewriteRope contains a *huge* number 00038 /// of RopePieces. The basic idea of the B+ Tree is that it allows us to find 00039 /// the RopePiece corresponding to some offset very efficiently, and it 00040 /// automatically balances itself on insertions of RopePieces (which can happen 00041 /// for both insertions and erases of string ranges). 00042 /// 00043 /// The one wrinkle on the theory is that we don't attempt to keep the tree 00044 /// properly balanced when erases happen. Erases of string data can both insert 00045 /// new RopePieces (e.g. when the middle of some other rope piece is deleted, 00046 /// which results in two rope pieces, which is just like an insert) or it can 00047 /// reduce the number of RopePieces maintained by the B+Tree. In the case when 00048 /// the number of RopePieces is reduced, we don't attempt to maintain the 00049 /// standard 'invariant' that each node in the tree contains at least 00050 /// 'WidthFactor' children/values. For our use cases, this doesn't seem to 00051 /// matter. 00052 /// 00053 /// The implementation below is primarily implemented in terms of three classes: 00054 /// RopePieceBTreeNode - Common base class for: 00055 /// 00056 /// RopePieceBTreeLeaf - Directly manages up to '2*WidthFactor' RopePiece 00057 /// nodes. This directly represents a chunk of the string with those 00058 /// RopePieces contatenated. 00059 /// RopePieceBTreeInterior - An interior node in the B+ Tree, which manages 00060 /// up to '2*WidthFactor' other nodes in the tree. 00061 00062 00063 //===----------------------------------------------------------------------===// 00064 // RopePieceBTreeNode Class 00065 //===----------------------------------------------------------------------===// 00066 00067 namespace { 00068 /// RopePieceBTreeNode - Common base class of RopePieceBTreeLeaf and 00069 /// RopePieceBTreeInterior. This provides some 'virtual' dispatching methods 00070 /// and a flag that determines which subclass the instance is. Also 00071 /// important, this node knows the full extend of the node, including any 00072 /// children that it has. This allows efficient skipping over entire subtrees 00073 /// when looking for an offset in the BTree. 00074 class RopePieceBTreeNode { 00075 protected: 00076 /// WidthFactor - This controls the number of K/V slots held in the BTree: 00077 /// how wide it is. Each level of the BTree is guaranteed to have at least 00078 /// 'WidthFactor' elements in it (either ropepieces or children), (except 00079 /// the root, which may have less) and may have at most 2*WidthFactor 00080 /// elements. 00081 enum { WidthFactor = 8 }; 00082 00083 /// Size - This is the number of bytes of file this node (including any 00084 /// potential children) covers. 00085 unsigned Size; 00086 00087 /// IsLeaf - True if this is an instance of RopePieceBTreeLeaf, false if it 00088 /// is an instance of RopePieceBTreeInterior. 00089 bool IsLeaf; 00090 00091 RopePieceBTreeNode(bool isLeaf) : Size(0), IsLeaf(isLeaf) {} 00092 ~RopePieceBTreeNode() {} 00093 public: 00094 00095 bool isLeaf() const { return IsLeaf; } 00096 unsigned size() const { return Size; } 00097 00098 void Destroy(); 00099 00100 /// split - Split the range containing the specified offset so that we are 00101 /// guaranteed that there is a place to do an insertion at the specified 00102 /// offset. The offset is relative, so "0" is the start of the node. 00103 /// 00104 /// If there is no space in this subtree for the extra piece, the extra tree 00105 /// node is returned and must be inserted into a parent. 00106 RopePieceBTreeNode *split(unsigned Offset); 00107 00108 /// insert - Insert the specified ropepiece into this tree node at the 00109 /// specified offset. The offset is relative, so "0" is the start of the 00110 /// node. 00111 /// 00112 /// If there is no space in this subtree for the extra piece, the extra tree 00113 /// node is returned and must be inserted into a parent. 00114 RopePieceBTreeNode *insert(unsigned Offset, const RopePiece &R); 00115 00116 /// erase - Remove NumBytes from this node at the specified offset. We are 00117 /// guaranteed that there is a split at Offset. 00118 void erase(unsigned Offset, unsigned NumBytes); 00119 00120 //static inline bool classof(const RopePieceBTreeNode *) { return true; } 00121 00122 }; 00123 } // end anonymous namespace 00124 00125 //===----------------------------------------------------------------------===// 00126 // RopePieceBTreeLeaf Class 00127 //===----------------------------------------------------------------------===// 00128 00129 namespace { 00130 /// RopePieceBTreeLeaf - Directly manages up to '2*WidthFactor' RopePiece 00131 /// nodes. This directly represents a chunk of the string with those 00132 /// RopePieces contatenated. Since this is a B+Tree, all values (in this case 00133 /// instances of RopePiece) are stored in leaves like this. To make iteration 00134 /// over the leaves efficient, they maintain a singly linked list through the 00135 /// NextLeaf field. This allows the B+Tree forward iterator to be constant 00136 /// time for all increments. 00137 class RopePieceBTreeLeaf : public RopePieceBTreeNode { 00138 /// NumPieces - This holds the number of rope pieces currently active in the 00139 /// Pieces array. 00140 unsigned char NumPieces; 00141 00142 /// Pieces - This tracks the file chunks currently in this leaf. 00143 /// 00144 RopePiece Pieces[2*WidthFactor]; 00145 00146 /// NextLeaf - This is a pointer to the next leaf in the tree, allowing 00147 /// efficient in-order forward iteration of the tree without traversal. 00148 RopePieceBTreeLeaf **PrevLeaf, *NextLeaf; 00149 public: 00150 RopePieceBTreeLeaf() : RopePieceBTreeNode(true), NumPieces(0), 00151 PrevLeaf(0), NextLeaf(0) {} 00152 ~RopePieceBTreeLeaf() { 00153 if (PrevLeaf || NextLeaf) 00154 removeFromLeafInOrder(); 00155 clear(); 00156 } 00157 00158 bool isFull() const { return NumPieces == 2*WidthFactor; } 00159 00160 /// clear - Remove all rope pieces from this leaf. 00161 void clear() { 00162 while (NumPieces) 00163 Pieces[--NumPieces] = RopePiece(); 00164 Size = 0; 00165 } 00166 00167 unsigned getNumPieces() const { return NumPieces; } 00168 00169 const RopePiece &getPiece(unsigned i) const { 00170 assert(i < getNumPieces() && "Invalid piece ID"); 00171 return Pieces[i]; 00172 } 00173 00174 const RopePieceBTreeLeaf *getNextLeafInOrder() const { return NextLeaf; } 00175 void insertAfterLeafInOrder(RopePieceBTreeLeaf *Node) { 00176 assert(PrevLeaf == 0 && NextLeaf == 0 && "Already in ordering"); 00177 00178 NextLeaf = Node->NextLeaf; 00179 if (NextLeaf) 00180 NextLeaf->PrevLeaf = &NextLeaf; 00181 PrevLeaf = &Node->NextLeaf; 00182 Node->NextLeaf = this; 00183 } 00184 00185 void removeFromLeafInOrder() { 00186 if (PrevLeaf) { 00187 *PrevLeaf = NextLeaf; 00188 if (NextLeaf) 00189 NextLeaf->PrevLeaf = PrevLeaf; 00190 } else if (NextLeaf) { 00191 NextLeaf->PrevLeaf = 0; 00192 } 00193 } 00194 00195 /// FullRecomputeSizeLocally - This method recomputes the 'Size' field by 00196 /// summing the size of all RopePieces. 00197 void FullRecomputeSizeLocally() { 00198 Size = 0; 00199 for (unsigned i = 0, e = getNumPieces(); i != e; ++i) 00200 Size += getPiece(i).size(); 00201 } 00202 00203 /// split - Split the range containing the specified offset so that we are 00204 /// guaranteed that there is a place to do an insertion at the specified 00205 /// offset. The offset is relative, so "0" is the start of the node. 00206 /// 00207 /// If there is no space in this subtree for the extra piece, the extra tree 00208 /// node is returned and must be inserted into a parent. 00209 RopePieceBTreeNode *split(unsigned Offset); 00210 00211 /// insert - Insert the specified ropepiece into this tree node at the 00212 /// specified offset. The offset is relative, so "0" is the start of the 00213 /// node. 00214 /// 00215 /// If there is no space in this subtree for the extra piece, the extra tree 00216 /// node is returned and must be inserted into a parent. 00217 RopePieceBTreeNode *insert(unsigned Offset, const RopePiece &R); 00218 00219 00220 /// erase - Remove NumBytes from this node at the specified offset. We are 00221 /// guaranteed that there is a split at Offset. 00222 void erase(unsigned Offset, unsigned NumBytes); 00223 00224 //static inline bool classof(const RopePieceBTreeLeaf *) { return true; } 00225 static inline bool classof(const RopePieceBTreeNode *N) { 00226 return N->isLeaf(); 00227 } 00228 }; 00229 } // end anonymous namespace 00230 00231 /// split - Split the range containing the specified offset so that we are 00232 /// guaranteed that there is a place to do an insertion at the specified 00233 /// offset. The offset is relative, so "0" is the start of the node. 00234 /// 00235 /// If there is no space in this subtree for the extra piece, the extra tree 00236 /// node is returned and must be inserted into a parent. 00237 RopePieceBTreeNode *RopePieceBTreeLeaf::split(unsigned Offset) { 00238 // Find the insertion point. We are guaranteed that there is a split at the 00239 // specified offset so find it. 00240 if (Offset == 0 || Offset == size()) { 00241 // Fastpath for a common case. There is already a splitpoint at the end. 00242 return 0; 00243 } 00244 00245 // Find the piece that this offset lands in. 00246 unsigned PieceOffs = 0; 00247 unsigned i = 0; 00248 while (Offset >= PieceOffs+Pieces[i].size()) { 00249 PieceOffs += Pieces[i].size(); 00250 ++i; 00251 } 00252 00253 // If there is already a split point at the specified offset, just return 00254 // success. 00255 if (PieceOffs == Offset) 00256 return 0; 00257 00258 // Otherwise, we need to split piece 'i' at Offset-PieceOffs. Convert Offset 00259 // to being Piece relative. 00260 unsigned IntraPieceOffset = Offset-PieceOffs; 00261 00262 // We do this by shrinking the RopePiece and then doing an insert of the tail. 00263 RopePiece Tail(Pieces[i].StrData, Pieces[i].StartOffs+IntraPieceOffset, 00264 Pieces[i].EndOffs); 00265 Size -= Pieces[i].size(); 00266 Pieces[i].EndOffs = Pieces[i].StartOffs+IntraPieceOffset; 00267 Size += Pieces[i].size(); 00268 00269 return insert(Offset, Tail); 00270 } 00271 00272 00273 /// insert - Insert the specified RopePiece into this tree node at the 00274 /// specified offset. The offset is relative, so "0" is the start of the node. 00275 /// 00276 /// If there is no space in this subtree for the extra piece, the extra tree 00277 /// node is returned and must be inserted into a parent. 00278 RopePieceBTreeNode *RopePieceBTreeLeaf::insert(unsigned Offset, 00279 const RopePiece &R) { 00280 // If this node is not full, insert the piece. 00281 if (!isFull()) { 00282 // Find the insertion point. We are guaranteed that there is a split at the 00283 // specified offset so find it. 00284 unsigned i = 0, e = getNumPieces(); 00285 if (Offset == size()) { 00286 // Fastpath for a common case. 00287 i = e; 00288 } else { 00289 unsigned SlotOffs = 0; 00290 for (; Offset > SlotOffs; ++i) 00291 SlotOffs += getPiece(i).size(); 00292 assert(SlotOffs == Offset && "Split didn't occur before insertion!"); 00293 } 00294 00295 // For an insertion into a non-full leaf node, just insert the value in 00296 // its sorted position. This requires moving later values over. 00297 for (; i != e; --e) 00298 Pieces[e] = Pieces[e-1]; 00299 Pieces[i] = R; 00300 ++NumPieces; 00301 Size += R.size(); 00302 return 0; 00303 } 00304 00305 // Otherwise, if this is leaf is full, split it in two halves. Since this 00306 // node is full, it contains 2*WidthFactor values. We move the first 00307 // 'WidthFactor' values to the LHS child (which we leave in this node) and 00308 // move the last 'WidthFactor' values into the RHS child. 00309 00310 // Create the new node. 00311 RopePieceBTreeLeaf *NewNode = new RopePieceBTreeLeaf(); 00312 00313 // Move over the last 'WidthFactor' values from here to NewNode. 00314 std::copy(&Pieces[WidthFactor], &Pieces[2*WidthFactor], 00315 &NewNode->Pieces[0]); 00316 // Replace old pieces with null RopePieces to drop refcounts. 00317 std::fill(&Pieces[WidthFactor], &Pieces[2*WidthFactor], RopePiece()); 00318 00319 // Decrease the number of values in the two nodes. 00320 NewNode->NumPieces = NumPieces = WidthFactor; 00321 00322 // Recompute the two nodes' size. 00323 NewNode->FullRecomputeSizeLocally(); 00324 FullRecomputeSizeLocally(); 00325 00326 // Update the list of leaves. 00327 NewNode->insertAfterLeafInOrder(this); 00328 00329 // These insertions can't fail. 00330 if (this->size() >= Offset) 00331 this->insert(Offset, R); 00332 else 00333 NewNode->insert(Offset - this->size(), R); 00334 return NewNode; 00335 } 00336 00337 /// erase - Remove NumBytes from this node at the specified offset. We are 00338 /// guaranteed that there is a split at Offset. 00339 void RopePieceBTreeLeaf::erase(unsigned Offset, unsigned NumBytes) { 00340 // Since we are guaranteed that there is a split at Offset, we start by 00341 // finding the Piece that starts there. 00342 unsigned PieceOffs = 0; 00343 unsigned i = 0; 00344 for (; Offset > PieceOffs; ++i) 00345 PieceOffs += getPiece(i).size(); 00346 assert(PieceOffs == Offset && "Split didn't occur before erase!"); 00347 00348 unsigned StartPiece = i; 00349 00350 // Figure out how many pieces completely cover 'NumBytes'. We want to remove 00351 // all of them. 00352 for (; Offset+NumBytes > PieceOffs+getPiece(i).size(); ++i) 00353 PieceOffs += getPiece(i).size(); 00354 00355 // If we exactly include the last one, include it in the region to delete. 00356 if (Offset+NumBytes == PieceOffs+getPiece(i).size()) 00357 PieceOffs += getPiece(i).size(), ++i; 00358 00359 // If we completely cover some RopePieces, erase them now. 00360 if (i != StartPiece) { 00361 unsigned NumDeleted = i-StartPiece; 00362 for (; i != getNumPieces(); ++i) 00363 Pieces[i-NumDeleted] = Pieces[i]; 00364 00365 // Drop references to dead rope pieces. 00366 std::fill(&Pieces[getNumPieces()-NumDeleted], &Pieces[getNumPieces()], 00367 RopePiece()); 00368 NumPieces -= NumDeleted; 00369 00370 unsigned CoverBytes = PieceOffs-Offset; 00371 NumBytes -= CoverBytes; 00372 Size -= CoverBytes; 00373 } 00374 00375 // If we completely removed some stuff, we could be done. 00376 if (NumBytes == 0) return; 00377 00378 // Okay, now might be erasing part of some Piece. If this is the case, then 00379 // move the start point of the piece. 00380 assert(getPiece(StartPiece).size() > NumBytes); 00381 Pieces[StartPiece].StartOffs += NumBytes; 00382 00383 // The size of this node just shrunk by NumBytes. 00384 Size -= NumBytes; 00385 } 00386 00387 //===----------------------------------------------------------------------===// 00388 // RopePieceBTreeInterior Class 00389 //===----------------------------------------------------------------------===// 00390 00391 namespace { 00392 /// RopePieceBTreeInterior - This represents an interior node in the B+Tree, 00393 /// which holds up to 2*WidthFactor pointers to child nodes. 00394 class RopePieceBTreeInterior : public RopePieceBTreeNode { 00395 /// NumChildren - This holds the number of children currently active in the 00396 /// Children array. 00397 unsigned char NumChildren; 00398 RopePieceBTreeNode *Children[2*WidthFactor]; 00399 public: 00400 RopePieceBTreeInterior() : RopePieceBTreeNode(false), NumChildren(0) {} 00401 00402 RopePieceBTreeInterior(RopePieceBTreeNode *LHS, RopePieceBTreeNode *RHS) 00403 : RopePieceBTreeNode(false) { 00404 Children[0] = LHS; 00405 Children[1] = RHS; 00406 NumChildren = 2; 00407 Size = LHS->size() + RHS->size(); 00408 } 00409 00410 ~RopePieceBTreeInterior() { 00411 for (unsigned i = 0, e = getNumChildren(); i != e; ++i) 00412 Children[i]->Destroy(); 00413 } 00414 00415 bool isFull() const { return NumChildren == 2*WidthFactor; } 00416 00417 unsigned getNumChildren() const { return NumChildren; } 00418 const RopePieceBTreeNode *getChild(unsigned i) const { 00419 assert(i < NumChildren && "invalid child #"); 00420 return Children[i]; 00421 } 00422 RopePieceBTreeNode *getChild(unsigned i) { 00423 assert(i < NumChildren && "invalid child #"); 00424 return Children[i]; 00425 } 00426 00427 /// FullRecomputeSizeLocally - Recompute the Size field of this node by 00428 /// summing up the sizes of the child nodes. 00429 void FullRecomputeSizeLocally() { 00430 Size = 0; 00431 for (unsigned i = 0, e = getNumChildren(); i != e; ++i) 00432 Size += getChild(i)->size(); 00433 } 00434 00435 00436 /// split - Split the range containing the specified offset so that we are 00437 /// guaranteed that there is a place to do an insertion at the specified 00438 /// offset. The offset is relative, so "0" is the start of the node. 00439 /// 00440 /// If there is no space in this subtree for the extra piece, the extra tree 00441 /// node is returned and must be inserted into a parent. 00442 RopePieceBTreeNode *split(unsigned Offset); 00443 00444 00445 /// insert - Insert the specified ropepiece into this tree node at the 00446 /// specified offset. The offset is relative, so "0" is the start of the 00447 /// node. 00448 /// 00449 /// If there is no space in this subtree for the extra piece, the extra tree 00450 /// node is returned and must be inserted into a parent. 00451 RopePieceBTreeNode *insert(unsigned Offset, const RopePiece &R); 00452 00453 /// HandleChildPiece - A child propagated an insertion result up to us. 00454 /// Insert the new child, and/or propagate the result further up the tree. 00455 RopePieceBTreeNode *HandleChildPiece(unsigned i, RopePieceBTreeNode *RHS); 00456 00457 /// erase - Remove NumBytes from this node at the specified offset. We are 00458 /// guaranteed that there is a split at Offset. 00459 void erase(unsigned Offset, unsigned NumBytes); 00460 00461 //static inline bool classof(const RopePieceBTreeInterior *) { return true; } 00462 static inline bool classof(const RopePieceBTreeNode *N) { 00463 return !N->isLeaf(); 00464 } 00465 }; 00466 } // end anonymous namespace 00467 00468 /// split - Split the range containing the specified offset so that we are 00469 /// guaranteed that there is a place to do an insertion at the specified 00470 /// offset. The offset is relative, so "0" is the start of the node. 00471 /// 00472 /// If there is no space in this subtree for the extra piece, the extra tree 00473 /// node is returned and must be inserted into a parent. 00474 RopePieceBTreeNode *RopePieceBTreeInterior::split(unsigned Offset) { 00475 // Figure out which child to split. 00476 if (Offset == 0 || Offset == size()) 00477 return 0; // If we have an exact offset, we're already split. 00478 00479 unsigned ChildOffset = 0; 00480 unsigned i = 0; 00481 for (; Offset >= ChildOffset+getChild(i)->size(); ++i) 00482 ChildOffset += getChild(i)->size(); 00483 00484 // If already split there, we're done. 00485 if (ChildOffset == Offset) 00486 return 0; 00487 00488 // Otherwise, recursively split the child. 00489 if (RopePieceBTreeNode *RHS = getChild(i)->split(Offset-ChildOffset)) 00490 return HandleChildPiece(i, RHS); 00491 return 0; // Done! 00492 } 00493 00494 /// insert - Insert the specified ropepiece into this tree node at the 00495 /// specified offset. The offset is relative, so "0" is the start of the 00496 /// node. 00497 /// 00498 /// If there is no space in this subtree for the extra piece, the extra tree 00499 /// node is returned and must be inserted into a parent. 00500 RopePieceBTreeNode *RopePieceBTreeInterior::insert(unsigned Offset, 00501 const RopePiece &R) { 00502 // Find the insertion point. We are guaranteed that there is a split at the 00503 // specified offset so find it. 00504 unsigned i = 0, e = getNumChildren(); 00505 00506 unsigned ChildOffs = 0; 00507 if (Offset == size()) { 00508 // Fastpath for a common case. Insert at end of last child. 00509 i = e-1; 00510 ChildOffs = size()-getChild(i)->size(); 00511 } else { 00512 for (; Offset > ChildOffs+getChild(i)->size(); ++i) 00513 ChildOffs += getChild(i)->size(); 00514 } 00515 00516 Size += R.size(); 00517 00518 // Insert at the end of this child. 00519 if (RopePieceBTreeNode *RHS = getChild(i)->insert(Offset-ChildOffs, R)) 00520 return HandleChildPiece(i, RHS); 00521 00522 return 0; 00523 } 00524 00525 /// HandleChildPiece - A child propagated an insertion result up to us. 00526 /// Insert the new child, and/or propagate the result further up the tree. 00527 RopePieceBTreeNode * 00528 RopePieceBTreeInterior::HandleChildPiece(unsigned i, RopePieceBTreeNode *RHS) { 00529 // Otherwise the child propagated a subtree up to us as a new child. See if 00530 // we have space for it here. 00531 if (!isFull()) { 00532 // Insert RHS after child 'i'. 00533 if (i + 1 != getNumChildren()) 00534 memmove(&Children[i+2], &Children[i+1], 00535 (getNumChildren()-i-1)*sizeof(Children[0])); 00536 Children[i+1] = RHS; 00537 ++NumChildren; 00538 return 0; 00539 } 00540 00541 // Okay, this node is full. Split it in half, moving WidthFactor children to 00542 // a newly allocated interior node. 00543 00544 // Create the new node. 00545 RopePieceBTreeInterior *NewNode = new RopePieceBTreeInterior(); 00546 00547 // Move over the last 'WidthFactor' values from here to NewNode. 00548 memcpy(&NewNode->Children[0], &Children[WidthFactor], 00549 WidthFactor*sizeof(Children[0])); 00550 00551 // Decrease the number of values in the two nodes. 00552 NewNode->NumChildren = NumChildren = WidthFactor; 00553 00554 // Finally, insert the two new children in the side the can (now) hold them. 00555 // These insertions can't fail. 00556 if (i < WidthFactor) 00557 this->HandleChildPiece(i, RHS); 00558 else 00559 NewNode->HandleChildPiece(i-WidthFactor, RHS); 00560 00561 // Recompute the two nodes' size. 00562 NewNode->FullRecomputeSizeLocally(); 00563 FullRecomputeSizeLocally(); 00564 return NewNode; 00565 } 00566 00567 /// erase - Remove NumBytes from this node at the specified offset. We are 00568 /// guaranteed that there is a split at Offset. 00569 void RopePieceBTreeInterior::erase(unsigned Offset, unsigned NumBytes) { 00570 // This will shrink this node by NumBytes. 00571 Size -= NumBytes; 00572 00573 // Find the first child that overlaps with Offset. 00574 unsigned i = 0; 00575 for (; Offset >= getChild(i)->size(); ++i) 00576 Offset -= getChild(i)->size(); 00577 00578 // Propagate the delete request into overlapping children, or completely 00579 // delete the children as appropriate. 00580 while (NumBytes) { 00581 RopePieceBTreeNode *CurChild = getChild(i); 00582 00583 // If we are deleting something contained entirely in the child, pass on the 00584 // request. 00585 if (Offset+NumBytes < CurChild->size()) { 00586 CurChild->erase(Offset, NumBytes); 00587 return; 00588 } 00589 00590 // If this deletion request starts somewhere in the middle of the child, it 00591 // must be deleting to the end of the child. 00592 if (Offset) { 00593 unsigned BytesFromChild = CurChild->size()-Offset; 00594 CurChild->erase(Offset, BytesFromChild); 00595 NumBytes -= BytesFromChild; 00596 // Start at the beginning of the next child. 00597 Offset = 0; 00598 ++i; 00599 continue; 00600 } 00601 00602 // If the deletion request completely covers the child, delete it and move 00603 // the rest down. 00604 NumBytes -= CurChild->size(); 00605 CurChild->Destroy(); 00606 --NumChildren; 00607 if (i != getNumChildren()) 00608 memmove(&Children[i], &Children[i+1], 00609 (getNumChildren()-i)*sizeof(Children[0])); 00610 } 00611 } 00612 00613 //===----------------------------------------------------------------------===// 00614 // RopePieceBTreeNode Implementation 00615 //===----------------------------------------------------------------------===// 00616 00617 void RopePieceBTreeNode::Destroy() { 00618 if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this)) 00619 delete Leaf; 00620 else 00621 delete cast<RopePieceBTreeInterior>(this); 00622 } 00623 00624 /// split - Split the range containing the specified offset so that we are 00625 /// guaranteed that there is a place to do an insertion at the specified 00626 /// offset. The offset is relative, so "0" is the start of the node. 00627 /// 00628 /// If there is no space in this subtree for the extra piece, the extra tree 00629 /// node is returned and must be inserted into a parent. 00630 RopePieceBTreeNode *RopePieceBTreeNode::split(unsigned Offset) { 00631 assert(Offset <= size() && "Invalid offset to split!"); 00632 if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this)) 00633 return Leaf->split(Offset); 00634 return cast<RopePieceBTreeInterior>(this)->split(Offset); 00635 } 00636 00637 /// insert - Insert the specified ropepiece into this tree node at the 00638 /// specified offset. The offset is relative, so "0" is the start of the 00639 /// node. 00640 /// 00641 /// If there is no space in this subtree for the extra piece, the extra tree 00642 /// node is returned and must be inserted into a parent. 00643 RopePieceBTreeNode *RopePieceBTreeNode::insert(unsigned Offset, 00644 const RopePiece &R) { 00645 assert(Offset <= size() && "Invalid offset to insert!"); 00646 if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this)) 00647 return Leaf->insert(Offset, R); 00648 return cast<RopePieceBTreeInterior>(this)->insert(Offset, R); 00649 } 00650 00651 /// erase - Remove NumBytes from this node at the specified offset. We are 00652 /// guaranteed that there is a split at Offset. 00653 void RopePieceBTreeNode::erase(unsigned Offset, unsigned NumBytes) { 00654 assert(Offset+NumBytes <= size() && "Invalid offset to erase!"); 00655 if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(this)) 00656 return Leaf->erase(Offset, NumBytes); 00657 return cast<RopePieceBTreeInterior>(this)->erase(Offset, NumBytes); 00658 } 00659 00660 00661 //===----------------------------------------------------------------------===// 00662 // RopePieceBTreeIterator Implementation 00663 //===----------------------------------------------------------------------===// 00664 00665 static const RopePieceBTreeLeaf *getCN(const void *P) { 00666 return static_cast<const RopePieceBTreeLeaf*>(P); 00667 } 00668 00669 // begin iterator. 00670 RopePieceBTreeIterator::RopePieceBTreeIterator(const void *n) { 00671 const RopePieceBTreeNode *N = static_cast<const RopePieceBTreeNode*>(n); 00672 00673 // Walk down the left side of the tree until we get to a leaf. 00674 while (const RopePieceBTreeInterior *IN = dyn_cast<RopePieceBTreeInterior>(N)) 00675 N = IN->getChild(0); 00676 00677 // We must have at least one leaf. 00678 CurNode = cast<RopePieceBTreeLeaf>(N); 00679 00680 // If we found a leaf that happens to be empty, skip over it until we get 00681 // to something full. 00682 while (CurNode && getCN(CurNode)->getNumPieces() == 0) 00683 CurNode = getCN(CurNode)->getNextLeafInOrder(); 00684 00685 if (CurNode != 0) 00686 CurPiece = &getCN(CurNode)->getPiece(0); 00687 else // Empty tree, this is an end() iterator. 00688 CurPiece = 0; 00689 CurChar = 0; 00690 } 00691 00692 void RopePieceBTreeIterator::MoveToNextPiece() { 00693 if (CurPiece != &getCN(CurNode)->getPiece(getCN(CurNode)->getNumPieces()-1)) { 00694 CurChar = 0; 00695 ++CurPiece; 00696 return; 00697 } 00698 00699 // Find the next non-empty leaf node. 00700 do 00701 CurNode = getCN(CurNode)->getNextLeafInOrder(); 00702 while (CurNode && getCN(CurNode)->getNumPieces() == 0); 00703 00704 if (CurNode != 0) 00705 CurPiece = &getCN(CurNode)->getPiece(0); 00706 else // Hit end(). 00707 CurPiece = 0; 00708 CurChar = 0; 00709 } 00710 00711 //===----------------------------------------------------------------------===// 00712 // RopePieceBTree Implementation 00713 //===----------------------------------------------------------------------===// 00714 00715 static RopePieceBTreeNode *getRoot(void *P) { 00716 return static_cast<RopePieceBTreeNode*>(P); 00717 } 00718 00719 RopePieceBTree::RopePieceBTree() { 00720 Root = new RopePieceBTreeLeaf(); 00721 } 00722 RopePieceBTree::RopePieceBTree(const RopePieceBTree &RHS) { 00723 assert(RHS.empty() && "Can't copy non-empty tree yet"); 00724 Root = new RopePieceBTreeLeaf(); 00725 } 00726 RopePieceBTree::~RopePieceBTree() { 00727 getRoot(Root)->Destroy(); 00728 } 00729 00730 unsigned RopePieceBTree::size() const { 00731 return getRoot(Root)->size(); 00732 } 00733 00734 void RopePieceBTree::clear() { 00735 if (RopePieceBTreeLeaf *Leaf = dyn_cast<RopePieceBTreeLeaf>(getRoot(Root))) 00736 Leaf->clear(); 00737 else { 00738 getRoot(Root)->Destroy(); 00739 Root = new RopePieceBTreeLeaf(); 00740 } 00741 } 00742 00743 void RopePieceBTree::insert(unsigned Offset, const RopePiece &R) { 00744 // #1. Split at Offset. 00745 if (RopePieceBTreeNode *RHS = getRoot(Root)->split(Offset)) 00746 Root = new RopePieceBTreeInterior(getRoot(Root), RHS); 00747 00748 // #2. Do the insertion. 00749 if (RopePieceBTreeNode *RHS = getRoot(Root)->insert(Offset, R)) 00750 Root = new RopePieceBTreeInterior(getRoot(Root), RHS); 00751 } 00752 00753 void RopePieceBTree::erase(unsigned Offset, unsigned NumBytes) { 00754 // #1. Split at Offset. 00755 if (RopePieceBTreeNode *RHS = getRoot(Root)->split(Offset)) 00756 Root = new RopePieceBTreeInterior(getRoot(Root), RHS); 00757 00758 // #2. Do the erasing. 00759 getRoot(Root)->erase(Offset, NumBytes); 00760 } 00761 00762 //===----------------------------------------------------------------------===// 00763 // RewriteRope Implementation 00764 //===----------------------------------------------------------------------===// 00765 00766 /// MakeRopeString - This copies the specified byte range into some instance of 00767 /// RopeRefCountString, and return a RopePiece that represents it. This uses 00768 /// the AllocBuffer object to aggregate requests for small strings into one 00769 /// allocation instead of doing tons of tiny allocations. 00770 RopePiece RewriteRope::MakeRopeString(const char *Start, const char *End) { 00771 unsigned Len = End-Start; 00772 assert(Len && "Zero length RopePiece is invalid!"); 00773 00774 // If we have space for this string in the current alloc buffer, use it. 00775 if (AllocOffs+Len <= AllocChunkSize) { 00776 memcpy(AllocBuffer->Data+AllocOffs, Start, Len); 00777 AllocOffs += Len; 00778 return RopePiece(AllocBuffer, AllocOffs-Len, AllocOffs); 00779 } 00780 00781 // If we don't have enough room because this specific allocation is huge, 00782 // just allocate a new rope piece for it alone. 00783 if (Len > AllocChunkSize) { 00784 unsigned Size = End-Start+sizeof(RopeRefCountString)-1; 00785 RopeRefCountString *Res = 00786 reinterpret_cast<RopeRefCountString *>(new char[Size]); 00787 Res->RefCount = 0; 00788 memcpy(Res->Data, Start, End-Start); 00789 return RopePiece(Res, 0, End-Start); 00790 } 00791 00792 // Otherwise, this was a small request but we just don't have space for it 00793 // Make a new chunk and share it with later allocations. 00794 00795 // If we had an old allocation, drop our reference to it. 00796 if (AllocBuffer && --AllocBuffer->RefCount == 0) 00797 delete [] (char*)AllocBuffer; 00798 00799 unsigned AllocSize = offsetof(RopeRefCountString, Data) + AllocChunkSize; 00800 AllocBuffer = reinterpret_cast<RopeRefCountString *>(new char[AllocSize]); 00801 AllocBuffer->RefCount = 0; 00802 memcpy(AllocBuffer->Data, Start, Len); 00803 AllocOffs = Len; 00804 00805 // Start out the new allocation with a refcount of 1, since we have an 00806 // internal reference to it. 00807 AllocBuffer->addRef(); 00808 return RopePiece(AllocBuffer, 0, Len); 00809 } 00810 00811