1// persist.cpp -- 2// $Id: persist.cpp 1230 2007-03-09 15:58:53Z jcw $ 3// This is part of Metakit, the homepage is http://www.equi4.com/metakit.html 4 5/** @file 6 * Implementation of the main file management classes 7 */ 8 9#include "header.h" 10#include "column.h" 11#include "persist.h" 12#include "handler.h" 13#include "store.h" 14#include "field.h" 15 16///////////////////////////////////////////////////////////////////////////// 17 18class c4_FileMark { 19 enum { 20 kStorageFormat = 0x4C4A, // b0 = 'J', b1 = <4C> (on Intel) 21 kReverseFormat = 0x4A4C // b0 = <4C>, b1 = 'J' 22 }; 23 24 t4_byte _data[8]; 25 26 public: 27 c4_FileMark(); 28 c4_FileMark(t4_i32 pos_, bool flipped_, bool extend_); 29 c4_FileMark(t4_i32 pos_, int len_); 30 31 t4_i32 Offset()const; 32 t4_i32 OldOffset()const; 33 34 bool IsHeader()const; 35 bool IsOldHeader()const; 36 bool IsFlipped()const; 37}; 38 39///////////////////////////////////////////////////////////////////////////// 40 41c4_FileMark::c4_FileMark() { 42 d4_assert(sizeof *this == 8); 43} 44 45c4_FileMark::c4_FileMark(t4_i32 pos_, bool flipped_, bool extend_) { 46 d4_assert(sizeof *this == 8); 47 *(short*)_data = flipped_ ? kReverseFormat : kStorageFormat; 48 _data[2] = extend_ ? 0x0A : 0x1A; 49 _data[3] = 0; 50 t4_byte *p = _data + 4; 51 for (int i = 24; i >= 0; i -= 8) 52 *p++ = (t4_byte)(pos_ >> i); 53 d4_assert(p == _data + sizeof _data); 54} 55 56c4_FileMark::c4_FileMark(t4_i32 pos_, int len_) { 57 d4_assert(sizeof *this == 8); 58 t4_byte *p = _data; 59 *p++ = 0x80; 60 for (int j = 16; j >= 0; j -= 8) 61 *p++ = (t4_byte)(len_ >> j); 62 for (int i = 24; i >= 0; i -= 8) 63 *p++ = (t4_byte)(pos_ >> i); 64 d4_assert(p == _data + sizeof _data); 65} 66 67t4_i32 c4_FileMark::Offset()const { 68 t4_i32 v = 0; 69 for (int i = 4; i < 8; ++i) 70 v = (v << 8) + _data[i]; 71 return v; 72} 73 74t4_i32 c4_FileMark::OldOffset()const { 75 t4_i32 v = 0; 76 for (int i = 8; --i >= 4;) 77 v = (v << 8) + _data[i]; 78 return v; 79} 80 81bool c4_FileMark::IsHeader()const { 82 return (_data[0] == 'J' || _data[0] == 'L') && (_data[0] ^ _data[1]) == ('J' 83 ^ 'L') && _data[2] == 0x1A; 84} 85 86bool c4_FileMark::IsOldHeader()const { 87 return IsHeader() && _data[3] == 0x80; 88} 89 90bool c4_FileMark::IsFlipped()const { 91 return *(short*)_data == kReverseFormat; 92 93} 94 95///////////////////////////////////////////////////////////////////////////// 96 97class c4_Allocator: public c4_DWordArray { 98 public: 99 c4_Allocator(); 100 101 void Initialize(t4_i32 first_ = 1); 102 103 t4_i32 AllocationLimit()const; 104 105 t4_i32 Allocate(t4_i32 len_); 106 void Occupy(t4_i32 pos_, t4_i32 len_); 107 void Release(t4_i32 pos_, t4_i32 len_); 108 void Dump(const char *str_); 109 t4_i32 FreeCounts(t4_i32 *bytes_ = 0); 110 111 private: 112 int Locate(t4_i32 pos_)const; 113 void InsertPair(int i_, t4_i32 from_, t4_i32 to_); 114 t4_i32 ReduceFrags(int goal_, int sHi_, int sLo_); 115}; 116 117///////////////////////////////////////////////////////////////////////////// 118// 119// Allocation of blocks is maintained in a separate data structure. 120// There is no allocation overhead in the allocation arena itself. 121// 122// A single vector of "walls" is maintained, sorted by position: 123// 124// * Each transition between free and allocated is a single entry. 125// The number of entries is <num-free-ranges> + <num-used-ranges>. 126// * By definition, free areas start at the positions indicated 127// by the entries on even indices. Allocated ones use odd entries. 128// * There is an extra <0,0> free slot at the very beginning. This 129// simplifies boundary conditions at the start of the arena. 130// * Position zero cannot be allocated, first slot starts at 1. 131// 132// Properties of this approach: 133// 134// * No allocation overhead for adjacent allocated areas. On the 135// other hand, the allocator does not know the size of used slots. 136// * Alternate function allows marking a specific range as occupied. 137// * Allocator can be initialized as either all free or all in-use. 138// * Allocation info contains only integers, it could be stored. 139// * To extend allocated slots: "occupy" extra bytes at the end. 140// * Generic: can be used for memory, disk files, and array entries. 141 142c4_Allocator::c4_Allocator() { 143 Initialize(); 144} 145 146void c4_Allocator::Initialize(t4_i32 first_) { 147 SetSize(0, 1000); // empty, and growing in large chunks 148 Add(0); // fake block at start 149 Add(0); // ... only used to avoid merging 150 151 // if occupied, add a tiny free slot at the end, else add entire range 152 const t4_i32 kMaxInt = 0x7fffffff; 153 if (first_ == 0) 154 first_ = kMaxInt; 155 156 Add(first_); // start at a nicely aligned position 157 Add(kMaxInt); // ... there is no limit on file size 158} 159 160t4_i32 c4_Allocator::Allocate(t4_i32 len_) { 161 // zero arg is ok, it simply returns first allocatable position 162 for (int i = 2; i < GetSize(); i += 2) 163 if (GetAt(i + 1) >= GetAt(i) + len_) { 164 t4_i32 pos = GetAt(i); 165 if ((t4_i32)GetAt(i + 1) > pos + len_) 166 ElementAt(i) += len_; 167 else 168 RemoveAt(i, 2); 169 return pos; 170 } 171 172 d4_assert(0); 173 return 0; // not reached 174} 175 176void c4_Allocator::Occupy(t4_i32 pos_, t4_i32 len_) { 177 d4_assert(pos_ > 0); 178 // note that zero size simply checks if there is any space to extend 179 180 int i = Locate(pos_); 181 d4_assert(0 < i && i < GetSize()); 182 183 if (i % 2) { 184 // allocation is not at start of free block 185 d4_assert((t4_i32)GetAt(i - 1) < pos_); 186 187 if ((t4_i32)GetAt(i) == pos_ + len_) 188 // allocate from end of free block 189 SetAt(i, pos_); 190 else 191 // split free block in two 192 InsertPair(i, pos_, pos_ + len_); 193 } else if ((t4_i32)GetAt(i) == pos_) 194 /* 195 This side of the if used to be unconditional, but that was 196 incorrect if ReduceFrags gets called (which only happens with 197 severely fragmented files) - there are cases when allocation 198 leads to an occupy request of which the free space list knows 199 nothing about because it dropped small segments. The solution 200 is to silently "allow" such allocations - fixed 29-02-2000 201 Thanks to Andrew Kuchling for his help in chasing this bug. 202 */ 203 { 204 // else extend tail of allocated area 205 if ((t4_i32)GetAt(i + 1) > pos_ + len_) 206 ElementAt(i) += len_; 207 // move start of next free up 208 else 209 RemoveAt(i, 2); 210 // remove this slot 211 } 212} 213 214void c4_Allocator::Release(t4_i32 pos, t4_i32 len) { 215 int i = Locate(pos + len); 216 d4_assert(0 < i && i < GetSize()); 217 d4_assert(i % 2 == 0); // don't release inside a free block 218 219 if ((t4_i32)GetAt(i) == pos) 220 // move start of next free down 221 ElementAt(i) -= len; 222 else if ((t4_i32)GetAt(i - 1) == pos) 223 // move end of previous free up 224 ElementAt(i - 1) += len; 225 else 226 // insert a new entry 227 InsertPair(i, pos, pos + len); 228 229 if (GetAt(i - 1) == GetAt(i)) 230 // merge if adjacent free 231 RemoveAt(i - 1, 2); 232} 233 234t4_i32 c4_Allocator::AllocationLimit()const { 235 d4_assert(GetSize() >= 2); 236 237 return GetAt(GetSize() - 2); 238} 239 240int c4_Allocator::Locate(t4_i32 pos)const { 241 int lo = 0, hi = GetSize() - 1; 242 243 while (lo < hi) { 244 int i = (lo + hi) / 2; 245 if (pos < (t4_i32)GetAt(i)) 246 hi = i - 1; 247 else if (pos > (t4_i32)GetAt(i)) 248 lo = i + 1; 249 else 250 return i; 251 } 252 253 return lo < GetSize() && pos > (t4_i32)GetAt(lo) ? lo + 1: lo; 254} 255 256void c4_Allocator::InsertPair(int i_, t4_i32 from_, t4_i32 to_) { 257 d4_assert(0 < i_); 258 d4_assert(i_ < GetSize()); 259 260 d4_assert(from_ < to_); 261 d4_assert((t4_i32)GetAt(i_ - 1) < from_); 262 //!d4_assert(to_ < GetAt(i_)); 263 264 if (to_ >= (t4_i32)GetAt(i_)) 265 return ; 266 // ignore 2nd allocation of used area 267 268 InsertAt(i_, from_, 2); 269 SetAt(i_ + 1, to_); 270 271 // it's ok to have arrays up to some 30000 bytes 272 if (GetSize() > 7500) 273 ReduceFrags(5000, 12, 6); 274} 275 276t4_i32 c4_Allocator::ReduceFrags(int goal_, int sHi_, int sLo_) { 277 // drastic fail-safe measure: remove small gaps if vec gets too long 278 // this will cause some lost free space but avoids array overflow 279 // the lost space will most probably be re-used after the next commit 280 281 int limit = GetSize() - 2; 282 t4_i32 loss = 0; 283 284 // go through all entries and remove gaps under the given threshold 285 for (int shift = sHi_; shift >= sLo_; --shift) { 286 // the threshold is a fraction of the current size of the arena 287 t4_i32 threshold = AllocationLimit() >> shift; 288 if (threshold == 0) 289 continue; 290 291 int n = 2; 292 for (int i = n; i < limit; i += 2) 293 if ((t4_i32)GetAt(i + 1) - (t4_i32)GetAt(i) > threshold) { 294 SetAt(n++, GetAt(i)); 295 SetAt(n++, GetAt(i + 1)); 296 } else 297 loss += GetAt(i + 1) - GetAt(i); 298 299 limit = n; 300 301 // if (GetSize() < goal_) - suboptimal, fixed 29-02-2000 302 if (limit < goal_) 303 break; 304 // got rid of enough entries, that's enough 305 } 306 307 int n = GetSize() - 2; 308 SetAt(limit++, GetAt(n++)); 309 SetAt(limit++, GetAt(n)); 310 SetSize(limit); 311 312 return loss; 313} 314 315#if q4_CHECK 316#include <stdio.h> 317 318void c4_Allocator::Dump(const char *str_) { 319 fprintf(stderr, "c4_Allocator::Dump, %d entries <%s>\n", GetSize(), str_); 320 for (int i = 2; i < GetSize(); i += 2) 321 fprintf(stderr, " %10ld .. %ld\n", GetAt(i - 1), GetAt(i)); 322 fprintf(stderr, "END\n"); 323} 324 325#else 326 327void c4_Allocator::Dump(const char *str_){} 328 329#endif 330 331t4_i32 c4_Allocator::FreeCounts(t4_i32 *bytes_) { 332 if (bytes_ != 0) { 333 t4_i32 total = 0; 334 for (int i = 2; i < GetSize() - 2; i += 2) 335 total += GetAt(i + 1) - GetAt(i); 336 *bytes_ = total; 337 } 338 return GetSize() / 2-2; 339} 340 341///////////////////////////////////////////////////////////////////////////// 342 343class c4_Differ { 344 public: 345 c4_Differ(c4_Storage &storage_); 346 ~c4_Differ(); 347 348 int NewDiffID(); 349 void CreateDiff(int id_, c4_Column &col_); 350 t4_i32 BaseOfDiff(int id_); 351 void ApplyDiff(int id_, c4_Column &col_)const; 352 353 void GetRoot(c4_Bytes &buffer_); 354 355 c4_Storage _storage; 356 c4_View _diffs; 357 c4_View _temp; 358 359 private: 360 void AddEntry(t4_i32, t4_i32, const c4_Bytes &); 361 362 c4_ViewProp pCols; // column info: 363 c4_IntProp pOrig; // original position 364 c4_ViewProp pDiff; // difference chunks: 365 c4_IntProp pKeep; // offset 366 c4_IntProp pResize; // length 367 c4_BytesProp pBytes; // data 368}; 369 370c4_Differ::c4_Differ(c4_Storage &storage_): _storage(storage_), pCols("_C"), 371 pOrig("_O"), pDiff("_D"), pKeep("_K"), pResize("_R"), pBytes("_B") { 372 // weird names, to avoid clashing with existing ones (capitalization!) 373 _diffs = _storage.GetAs("_C[_O:I,_D[_K:I,_R:I,_B:B]]"); 374} 375 376c4_Differ::~c4_Differ() { 377 _diffs = c4_View(); 378} 379 380void c4_Differ::AddEntry(t4_i32 off_, t4_i32 len_, const c4_Bytes &data_) { 381 int n = _temp.GetSize(); 382 _temp.SetSize(n + 1); 383 c4_RowRef r = _temp[n]; 384 385 pKeep(r) = (t4_i32)off_; 386 pResize(r) = (t4_i32)len_; 387 pBytes(r).SetData(data_); 388} 389 390int c4_Differ::NewDiffID() { 391 int n = _diffs.GetSize(); 392 _diffs.SetSize(n + 1); 393 return n; 394} 395 396void c4_Differ::CreateDiff(int id_, c4_Column &col_) { 397 _temp.SetSize(0); 398#if 0 399 t4_i32 offset = 0; 400 t4_i32 savedOff = 0; 401 t4_i32 savedLen = 0; 402 403 c4_Strategy *strat = col_.Persist() != 0 ? &col_.Strategy(): 0; 404 405 c4_ColIter iter(col_, 0, col_.ColSize()); 406 while (iter.Next()) { 407 const t4_byte *p = iter.BufLoad(); 408 if (strat != 0 && strat->_mapStart != 0 && p >= strat->_mapStart && p - 409 strat->_mapStart < strat->_dataSize) { 410 t4_i32 nextOff = p - strat->_mapStart; 411 if (savedLen == 0) 412 savedOff = nextOff; 413 if (nextOff == savedOff + savedLen) { 414 savedLen += iter.BufLen(); 415 continue; 416 } 417 418 if (savedLen > 0) 419 AddEntry(savedOff, savedLen, c4_Bytes()); 420 421 savedOff = nextOff; 422 savedLen = iter.BufLen(); 423 } else { 424 AddEntry(savedOff, savedLen, c4_Bytes(p, iter.BufLen())); 425 savedLen = 0; 426 } 427 428 offset += iter.BufLen(); 429 } 430 431 c4_View diff = pDiff(_diffs[id_]); 432 if (_temp.GetSize() != diff.GetSize() || _temp != diff) 433#else 434 c4_Bytes t1; 435 const t4_byte *p = col_.FetchBytes(0, col_.ColSize(), t1, false); 436 AddEntry(0, 0, c4_Bytes(p, col_.ColSize())); 437#endif 438 pDiff(_diffs[id_]) = _temp; 439 440 pOrig(_diffs[id_]) = col_.Position(); 441} 442 443t4_i32 c4_Differ::BaseOfDiff(int id_) { 444 d4_assert(0 <= id_ && id_ < _diffs.GetSize()); 445 446 return pOrig(_diffs[id_]); 447} 448 449void c4_Differ::ApplyDiff(int id_, c4_Column &col_)const { 450 d4_assert(0 <= id_ && id_ < _diffs.GetSize()); 451 452 c4_View diff = pDiff(_diffs[id_]); 453 t4_i32 offset = 0; 454 455 for (int n = 0; n < diff.GetSize(); ++n) { 456 c4_RowRef row(diff[n]); 457 offset += pKeep(row); 458 459 c4_Bytes data; 460 pBytes(row).GetData(data); 461 462 // the following code is a lot like c4_MemoRef::Modify 463 const t4_i32 change = pResize(row); 464 if (change < 0) 465 col_.Shrink(offset, - change); 466 else if (change > 0) 467 col_.Grow(offset, change); 468 469 col_.StoreBytes(offset, data); 470 offset += data.Size(); 471 } 472 473 if (offset > col_.ColSize()) 474 col_.Shrink(offset, offset - col_.ColSize()); 475} 476 477void c4_Differ::GetRoot(c4_Bytes &buffer_) { 478 int last = _diffs.GetSize() - 1; 479 if (last >= 0) { 480 c4_Bytes temp; 481 c4_View diff = pDiff(_diffs[last]); 482 if (diff.GetSize() > 0) 483 pBytes(diff[0]).GetData(buffer_); 484 } 485} 486 487///////////////////////////////////////////////////////////////////////////// 488 489c4_SaveContext::c4_SaveContext(c4_Strategy &strategy_, bool fullScan_, int 490 mode_, c4_Differ *differ_, c4_Allocator *space_): _strategy(strategy_), _walk 491 (0), _differ(differ_), _space(space_), _cleanup(0), _nextSpace(0), _preflight 492 (true), _fullScan(fullScan_), _mode(mode_), _nextPosIndex(0), _bufPtr(_buffer) 493 , _curr(_buffer), _limit(_buffer) { 494 if (_space == 0) 495 _space = _cleanup = d4_new c4_Allocator; 496 497 _nextSpace = _mode == 1 ? d4_new c4_Allocator: _space; 498} 499 500c4_SaveContext::~c4_SaveContext() { 501 delete _cleanup; 502 if (_nextSpace != _space) 503 delete _nextSpace; 504} 505 506bool c4_SaveContext::IsFlipped()const { 507 return _strategy._bytesFlipped; 508} 509 510bool c4_SaveContext::Serializing()const { 511 return _fullScan; 512} 513 514void c4_SaveContext::AllocDump(const char *str_, bool next_) { 515 c4_Allocator *ap = next_ ? _nextSpace : _space; 516 if (ap != 0) 517 ap->Dump(str_); 518} 519 520void c4_SaveContext::FlushBuffer() { 521 int n = _curr - _bufPtr; 522 if (_walk != 0 && n > 0) { 523 t4_i32 end = _walk->ColSize(); 524 _walk->Grow(end, n); 525 _walk->StoreBytes(end, c4_Bytes(_bufPtr, n)); 526 } 527 528 _curr = _bufPtr = _buffer; 529 _limit = _buffer + sizeof _buffer; 530} 531 532c4_Column *c4_SaveContext::SetWalkBuffer(c4_Column *col_) { 533 FlushBuffer(); 534 535 c4_Column *prev = _walk; 536 _walk = col_; 537 return prev; 538} 539 540void c4_SaveContext::Write(const void *buf_, int len_) { 541 // use buffering if possible 542 if (_curr + len_ <= _limit) { 543 memcpy(_curr, buf_, len_); 544 _curr += len_; 545 } else { 546 FlushBuffer(); 547 _bufPtr = (t4_byte*)buf_; // also loses const 548 _curr = _limit = _bufPtr + len_; 549 FlushBuffer(); 550 } 551} 552 553void c4_SaveContext::StoreValue(t4_i32 v_) { 554 if (_walk == 0) 555 return ; 556 557 if (_curr + 10 >= _limit) 558 FlushBuffer(); 559 560 d4_assert(_curr + 10 < _limit); 561 c4_Column::PushValue(_curr, v_); 562} 563 564void c4_SaveContext::SaveIt(c4_HandlerSeq &root_, c4_Allocator **spacePtr_, 565 c4_Bytes &rootWalk_) { 566 d4_assert(_space != 0); 567 568 const t4_i32 size = _strategy.FileSize(); 569 if (_strategy._failure != 0) 570 return ; 571 572 const t4_i32 end = _fullScan ? 0 : size - _strategy._baseOffset; 573 574 if (_differ == 0) { 575 if (_mode != 1) 576 _space->Initialize(); 577 578 // don't allocate anything inside the file in extend mode 579 if (_mode == 2 && end > 0) { 580 _space->Occupy(1, end - 1); 581 _nextSpace->Occupy(1, end - 1); 582 } 583 584 // the header is always reserved 585 _space->Occupy(1, 7); 586 _nextSpace->Occupy(1, 7); 587 588 if (end > 0) { 589 d4_assert(end >= 16); 590 _space->Occupy(end - 16, 16); 591 _nextSpace->Occupy(end - 16, 16); 592 _space->Occupy(end, 8); 593 _nextSpace->Occupy(end, 8); 594 } 595 } 596 597 //AllocDump("a1", false); 598 //AllocDump("a2", true); 599 600 // first pass allocates columns and constructs shallow walks 601 c4_Column walk(root_.Persist()); 602 SetWalkBuffer(&walk); 603 CommitSequence(root_, true); 604 SetWalkBuffer(0); 605 CommitColumn(walk); 606 607 c4_Bytes tempWalk; 608 walk.FetchBytes(0, walk.ColSize(), tempWalk, true); 609 610 t4_i32 limit = _nextSpace->AllocationLimit(); 611 d4_assert(limit >= 8 || _differ != 0); 612 613 if (limit < 0) { 614 // 2006-01-12 #2: catch file size exceeding 2 Gb 615 _strategy._failure = - 1; // unusual non-zero value flags this case 616 return ; 617 } 618 619 bool changed = _fullScan || tempWalk != rootWalk_; 620 621 rootWalk_ = c4_Bytes(tempWalk.Contents(), tempWalk.Size(), true); 622 623 _preflight = false; 624 625 // special-case to avoid saving data if file is logically empty 626 // in that case, the data is 0x80 0x81 0x80 (plus the header) 627 if (!_fullScan && limit <= 11 && _differ == 0) { 628 _space->Initialize(); 629 _nextSpace->Initialize(); 630 changed = false; 631 } 632 633 if (!changed) 634 return ; 635 636 //AllocDump("b1", false); 637 //AllocDump("b2", true); 638 639 if (_differ != 0) { 640 int n = _differ->NewDiffID(); 641 _differ->CreateDiff(n, walk); 642 return ; 643 } 644 645 d4_assert(_mode != 0 || _fullScan); 646 647 // this is the place where writing may start 648 649 // figure out where the new file ends and write a skip tail there 650 t4_i32 end0 = end; 651 652 // true if the file need not be extended due to internal free space 653 bool inPlace = end0 == limit - 8; 654 if (inPlace) { 655 d4_assert(!_fullScan); 656 _space->Release(end0, 8); 657 _nextSpace->Release(end0, 8); 658 end0 -= 16; // overwrite existing tail markers 659 } else { 660 /* 18-11-2005 write new end marker and flush it before *anything* else! */ 661 if (!_fullScan && end0 < limit) { 662 c4_FileMark mark1(limit, 0); 663 _strategy.DataWrite(limit, &mark1, sizeof mark1); 664 _strategy.DataCommit(0); 665 if (_strategy._failure != 0) 666 return ; 667 } 668 669 c4_FileMark head(limit + 16-end, _strategy._bytesFlipped, end > 0); 670 _strategy.DataWrite(end, &head, sizeof head); 671 672 if (end0 < limit) 673 end0 = limit; 674 // create a gap 675 } 676 677 t4_i32 end1 = end0 + 8; 678 t4_i32 end2 = end1 + 8; 679 680 if (!_fullScan && !inPlace) { 681 c4_FileMark mark1(end0, 0); 682 _strategy.DataWrite(end0, &mark1, sizeof mark1); 683#if q4_WIN32 684 /* March 8, 2002 685 * On at least NT4 with NTFS, extending a file can cause it to be 686 * rounded up further than expected. To prevent creating a bad 687 * file (since the file does then not end with a marker), the 688 * workaround it so simply accept the new end instead and rewrite. 689 * Note that between these two writes, the file is in a bad state. 690 */ 691 t4_i32 realend = _strategy.FileSize() - _strategy._baseOffset; 692 if (realend > end1) { 693 end0 = limit = realend - 8; 694 end1 = realend; 695 end2 = realend + 8; 696 c4_FileMark mark1a(end0, 0); 697 _strategy.DataWrite(end0, &mark1a, sizeof mark1a); 698 } 699#endif 700 d4_assert(_strategy.FileSize() == _strategy._baseOffset + end1); 701 } 702 703 _space->Occupy(end0, 16); 704 _nextSpace->Occupy(end0, 16); 705 706 // strategy.DataCommit(0); // may be needed, need more info on how FS's work 707 // but this would need more work, since we can't adjust file-mapping here 708 709 // second pass saves the columns and structure to disk 710 CommitSequence(root_, true); // writes changed columns 711 CommitColumn(walk); 712 713 //! d4_assert(_curr == 0); 714 d4_assert(_nextPosIndex == _newPositions.GetSize()); 715 716 if (_fullScan) { 717 c4_FileMark mark1(limit, 0); 718 _strategy.DataWrite(_strategy.FileSize() - _strategy._baseOffset, &mark1, 719 sizeof mark1); 720 721 c4_FileMark mark2(limit - walk.ColSize(), walk.ColSize()); 722 _strategy.DataWrite(_strategy.FileSize() - _strategy._baseOffset, &mark2, 723 sizeof mark2); 724 725 return ; 726 } 727 728 if (inPlace) 729 d4_assert(_strategy.FileSize() == _strategy._baseOffset + end2); 730 else { 731 // make sure the allocated size hasn't changed 732 d4_assert(_nextSpace->AllocationLimit() == limit + 16); 733 d4_assert(end0 >= limit); 734 d4_assert(_strategy.FileSize() - _strategy._baseOffset == end1); 735 } 736 737 if (walk.Position() == 0 || _strategy._failure != 0) 738 return ; 739 740 _strategy.DataCommit(0); 741 742 c4_FileMark mark2(walk.Position(), walk.ColSize()); 743 _strategy.DataWrite(end1, &mark2, sizeof mark2); 744 d4_assert(_strategy.FileSize() - _strategy._baseOffset == end2); 745 746 // do not alter the file header in extend mode, unless it is new 747 if (!_fullScan && (_mode == 1 || end == 0)) { 748 _strategy.DataCommit(0); 749 750 c4_FileMark head(end2, _strategy._bytesFlipped, false); 751 d4_assert(head.IsHeader()); 752 _strategy.DataWrite(0, &head, sizeof head); 753 754 // if the file became smaller, we could shrink it 755 if (limit + 16 < end0) { 756 /* 757 Not yet, this depends on the strategy class being able to truncate, but 758 there is no way to find out whether it does (the solution is to write tail 759 markers in such a way that the file won't grow unnecessarily if it doesn't). 760 761 The logic will probably be: 762 763 * write new skip + commit "tails" at limit (no visible effect on file) 764 * overwrite commit tail at end with a skip to this new one (equivalent) 765 * replace header with one pointing to that internal new one (equivalent) 766 * flush (now the file is valid both truncated and not-yet-truncated 767 768 end = limit; 769 */ 770 } 771 } 772 773 // if using memory mapped files, make sure the map is no longer in use 774 if (_strategy._mapStart != 0) 775 root_.UnmappedAll(); 776 777 // commit and tell strategy object what the new file size is, this 778 // may be smaller now, if old data at the end is no longer referenced 779 _strategy.DataCommit(end2); 780 781 d4_assert(_strategy.FileSize() - _strategy._baseOffset == end2); 782 783 if (spacePtr_ != 0 && _space != _nextSpace) { 784 d4_assert(*spacePtr_ == _space); 785 delete *spacePtr_; 786 *spacePtr_ = _nextSpace; 787 _nextSpace = 0; 788 } 789} 790 791bool c4_SaveContext::CommitColumn(c4_Column &col_) { 792 bool changed = col_.IsDirty() || _fullScan; 793 794 t4_i32 sz = col_.ColSize(); 795 StoreValue(sz); 796 if (sz > 0) { 797 t4_i32 pos = col_.Position(); 798 799 if (_differ) { 800 if (changed) { 801 int n = pos < 0 ? ~pos: _differ->NewDiffID(); 802 _differ->CreateDiff(n, col_); 803 804 d4_assert(n >= 0); 805 pos = ~n; 806 } 807 } else if (_preflight) { 808 if (changed) 809 pos = _space->Allocate(sz); 810 811 _nextSpace->Occupy(pos, sz); 812 _newPositions.Add(pos); 813 } else { 814 pos = _newPositions.GetAt(_nextPosIndex++); 815 816 if (changed) 817 col_.SaveNow(_strategy, pos); 818 819 if (!_fullScan) 820 col_.SetLocation(pos, sz); 821 } 822 823 StoreValue(pos); 824 } 825 826 return changed; 827} 828 829void c4_SaveContext::CommitSequence(c4_HandlerSeq &seq_, bool selfDesc_) { 830 StoreValue(0); // sias prefix 831 832 if (selfDesc_) { 833 c4_String desc = seq_.Description(); 834 int k = desc.GetLength(); 835 StoreValue(k); 836 Write((const char*)desc, k); 837 } 838 839 StoreValue(seq_.NumRows()); 840 if (seq_.NumRows() > 0) 841 for (int i = 0; i < seq_.NumFields(); ++i) 842 seq_.NthHandler(i).Commit(*this); 843} 844 845///////////////////////////////////////////////////////////////////////////// 846 847// used for on-the-fly conversion of old-format datafiles 848t4_byte *_oldBuf; 849const t4_byte *_oldCurr; 850const t4_byte *_oldLimit; 851t4_i32 _oldSeek; 852 853 854c4_Persist::c4_Persist(c4_Strategy &strategy_, bool owned_, int mode_): _space 855 (0), _strategy(strategy_), _root(0), _differ(0), _fCommit(0), _mode(mode_), 856 _owned(owned_), _oldBuf(0), _oldCurr(0), _oldLimit(0), _oldSeek( - 1) { 857 if (_mode == 1) 858 _space = d4_new c4_Allocator; 859} 860 861c4_Persist::~c4_Persist() { 862 delete _differ; 863 864 if (_owned) { 865 if (_root != 0) 866 _root->UnmappedAll(); 867 delete &_strategy; 868 } 869 870 delete _space; 871 872 if (_oldBuf != 0) 873 delete [] _oldBuf; 874} 875 876c4_HandlerSeq &c4_Persist::Root()const { 877 d4_assert(_root != 0); 878 return *_root; 879} 880 881void c4_Persist::SetRoot(c4_HandlerSeq *root_) { 882 d4_assert(_root == 0); 883 _root = root_; 884} 885 886c4_Strategy &c4_Persist::Strategy()const { 887 return _strategy; 888} 889 890bool c4_Persist::AutoCommit(bool flag_) { 891 bool prev = _fCommit != 0; 892 if (flag_) 893 _fCommit = &c4_Persist::Commit; 894 else 895 _fCommit = 0; 896 return prev; 897} 898 899void c4_Persist::DoAutoCommit() { 900 if (_fCommit != 0) 901 (this->*_fCommit)(false); 902} 903 904bool c4_Persist::SetAside(c4_Storage &aside_) { 905 delete _differ; 906 _differ = d4_new c4_Differ(aside_); 907 Rollback(false); 908 return true; //! true if the generation matches 909} 910 911c4_Storage *c4_Persist::GetAside()const { 912 return _differ != 0 ? &_differ->_storage: 0; 913} 914 915bool c4_Persist::Commit(bool full_) { 916 // 1-Mar-1999, new semantics! return success status of commits 917 _strategy._failure = 0; 918 919 if (!_strategy.IsValid()) 920 return false; 921 922 if (_mode == 0 && (_differ == 0 || full_)) 923 // can't commit to r/o file 924 return false; 925 // note that _strategy._failure is *zero* in this case 926 927 c4_SaveContext ar(_strategy, false, _mode, full_ ? 0 : _differ, _space); 928 929 // get rid of temp properties which still use the datafile 930 if (_mode == 1) 931 _root->DetachFromStorage(false); 932 933 // 30-3-2001: moved down, fixes "crash every 2nd call of mkdemo/dbg" 934 ar.SaveIt(*_root, &_space, _rootWalk); 935 return _strategy._failure == 0; 936} 937 938bool c4_Persist::Rollback(bool full_) { 939 _root->DetachFromParent(); 940 _root->DetachFromStorage(true); 941 _root = 0; 942 943 if (_space != 0) 944 _space->Initialize(); 945 946 c4_HandlerSeq *seq = d4_new c4_HandlerSeq(this); 947 seq->DefineRoot(); 948 SetRoot(seq); 949 950 if (full_) { 951 delete _differ; 952 _differ = 0; 953 } 954 955 LoadAll(); 956 957 return _strategy._failure == 0; 958} 959 960bool c4_Persist::LoadIt(c4_Column &walk_) { 961 t4_i32 limit = _strategy.FileSize(); 962 if (_strategy._failure != 0) 963 return false; 964 965 if (_strategy.EndOfData(limit) < 0) { 966 _strategy.SetBase(limit); 967 d4_assert(_strategy._failure == 0); // file is ok, but empty 968 return false; 969 } 970 971 if (_strategy._rootLen > 0) 972 walk_.SetLocation(_strategy._rootPos, _strategy._rootLen); 973 974 // if the file size has increased, we must remap 975 if (_strategy._mapStart != 0 && _strategy.FileSize() > _strategy._baseOffset 976 + _strategy._dataSize) 977 _strategy.ResetFileMapping(); 978 979 return true; 980} 981 982void c4_Persist::LoadAll() { 983 c4_Column walk(this); 984 if (!LoadIt(walk)) 985 return ; 986 987 if (_strategy._rootLen < 0) { 988 _oldSeek = _strategy._rootPos; 989 _oldBuf = d4_new t4_byte[512]; 990 _oldCurr = _oldLimit = _oldBuf; 991 992 t4_i32 n = FetchOldValue(); 993 d4_assert(n == 0); 994 n = FetchOldValue(); 995 d4_assert(n > 0); 996 997 c4_Bytes temp; 998 t4_byte *buf = temp.SetBuffer(n); 999 d4_dbgdef(int n2 = )OldRead(buf, n); 1000 d4_assert(n2 == n); 1001 1002 c4_String s = "[" + c4_String((const char*)buf, n) + "]"; 1003 const char *desc = s; 1004 1005 c4_Field *f = d4_new c4_Field(desc); 1006 d4_assert(! *desc); 1007 1008 //?_root->DefineRoot(); 1009 _root->Restructure(*f, false); 1010 1011 _root->OldPrepare(); 1012 1013 // don't touch data inside while converting the file 1014 if (_strategy.FileSize() >= 0) 1015 OccupySpace(1, _strategy.FileSize()); 1016 } else { 1017 walk.FetchBytes(0, walk.ColSize(), _rootWalk, true); 1018 if (_differ) 1019 _differ->GetRoot(_rootWalk); 1020 1021 // 2006-08-01: maintain stable-storage space usage on re-open 1022 OccupySpace(_strategy._rootPos, _strategy._rootLen); 1023 1024 // define and fill the root table 1025 const t4_byte *ptr = _rootWalk.Contents(); 1026 _root->Prepare(&ptr, true); 1027 d4_assert(ptr == _rootWalk.Contents() + _rootWalk.Size()); 1028 } 1029} 1030 1031t4_i32 c4_Persist::FetchOldValue() { 1032 d4_assert(_oldSeek >= 0); 1033 1034 if (_oldCurr == _oldLimit) { 1035 int n = OldRead(_oldBuf, 500); 1036 _oldLimit = _oldCurr + n; 1037 _oldBuf[n] = 0x80; // to force end 1038 } 1039 1040 const t4_byte *p = _oldCurr; 1041 t4_i32 value = c4_Column::PullValue(p); 1042 1043 if (p > _oldLimit) { 1044 int k = _oldLimit - _oldCurr; 1045 d4_assert(0 < k && k < 10); 1046 memcpy(_oldBuf, _oldCurr, k); 1047 1048 int n = OldRead(_oldBuf + k, 500); 1049 _oldCurr = _oldBuf + k; 1050 _oldLimit = _oldCurr + n; 1051 _oldBuf[n + k] = 0x80; // to force end 1052 1053 p = _oldCurr; 1054 value = c4_Column::PullValue(p); 1055 d4_assert(p <= _oldLimit); 1056 } 1057 1058 _oldCurr = p; 1059 return value; 1060} 1061 1062void c4_Persist::FetchOldLocation(c4_Column &col_) { 1063 d4_assert(_oldSeek >= 0); 1064 1065 t4_i32 sz = FetchOldValue(); 1066 if (sz > 0) 1067 col_.SetLocation(FetchOldValue(), sz); 1068} 1069 1070t4_i32 c4_Persist::FreeBytes(t4_i32 *bytes_) { 1071 return _space == 0 ? - 1: _space->FreeCounts(bytes_); 1072} 1073 1074int c4_Persist::OldRead(t4_byte *buf_, int len_) { 1075 d4_assert(_oldSeek >= 0); 1076 1077 t4_i32 newSeek = _oldSeek + _oldCurr - _oldLimit; 1078 int n = _strategy.DataRead(newSeek, buf_, len_); 1079 d4_assert(n > 0); 1080 _oldSeek = newSeek + n; 1081 _oldCurr = _oldLimit = _oldBuf; 1082 return n; 1083} 1084 1085c4_HandlerSeq *c4_Persist::Load(c4_Stream *stream_) { 1086 d4_assert(stream_ != 0); 1087 1088 c4_FileMark head; 1089 if (stream_->Read(&head, sizeof head) != sizeof head || !head.IsHeader()) 1090 return 0; 1091 // no data in file 1092 1093 //_oldStyle = head._data[3] == 0x80; 1094 d4_assert(!head.IsOldHeader()); 1095 1096 t4_i32 limit = head.Offset(); 1097 1098 c4_StreamStrategy *strat = d4_new c4_StreamStrategy(limit); 1099 strat->_bytesFlipped = head.IsFlipped(); 1100 strat->DataWrite(strat->FileSize() - strat->_baseOffset, &head, sizeof head); 1101 1102 while (strat->FileSize() - strat->_baseOffset < limit) { 1103 char buffer[4096]; 1104 int n = stream_->Read(buffer, sizeof buffer); 1105 d4_assert(n > 0); 1106 strat->DataWrite(strat->FileSize() - strat->_baseOffset, buffer, n); 1107 } 1108 1109 c4_Persist *pers = d4_new c4_Persist(*strat, true, 0); 1110 c4_HandlerSeq *seq = d4_new c4_HandlerSeq(pers); 1111 seq->DefineRoot(); 1112 pers->SetRoot(seq); 1113 1114 c4_Column walk(pers); 1115 if (!pers->LoadIt(walk)) { 1116 seq->IncRef(); 1117 seq->DecRef(); // a funny way to delete 1118 return 0; 1119 } 1120 1121 c4_Bytes tempWalk; 1122 walk.FetchBytes(0, walk.ColSize(), tempWalk, true); 1123 1124 const t4_byte *ptr = tempWalk.Contents(); 1125 seq->Prepare(&ptr, true); 1126 d4_assert(ptr == tempWalk.Contents() + tempWalk.Size()); 1127 1128 return seq; 1129} 1130 1131void c4_Persist::Save(c4_Stream *stream_, c4_HandlerSeq &root_) { 1132 d4_assert(stream_ != 0); 1133 1134 c4_StreamStrategy strat(stream_); 1135 1136 // 31-01-2002: streaming must adopt byte order of origin datafile 1137 c4_Persist *p = root_.Persist(); 1138 if (p != 0) 1139 strat._bytesFlipped = p->Strategy()._bytesFlipped; 1140 1141 c4_SaveContext ar(strat, true, 0, 0, 0); 1142 c4_Bytes tempWalk; 1143 ar.SaveIt(root_, 0, tempWalk); 1144} 1145 1146t4_i32 c4_Persist::LookupAside(int id_) { 1147 d4_assert(_differ != 0); 1148 1149 return _differ->BaseOfDiff(id_); 1150} 1151 1152void c4_Persist::ApplyAside(int id_, c4_Column &col_) { 1153 d4_assert(_differ != 0); 1154 1155 _differ->ApplyDiff(id_, col_); 1156} 1157 1158void c4_Persist::OccupySpace(t4_i32 pos_, t4_i32 len_) { 1159 d4_assert(_mode != 1 || _space != 0); 1160 1161 if (_space != 0) 1162 _space->Occupy(pos_, len_); 1163} 1164 1165///////////////////////////////////////////////////////////////////////////// 1166