1// persist.cpp --
2// $Id: persist.cpp 1230 2007-03-09 15:58:53Z jcw $
3// This is part of Metakit, the homepage is http://www.equi4.com/metakit.html
4
5/** @file
6 * Implementation of the main file management classes
7 */
8
9#include "header.h"
10#include "column.h"
11#include "persist.h"
12#include "handler.h"
13#include "store.h"
14#include "field.h"
15
16/////////////////////////////////////////////////////////////////////////////
17
18class c4_FileMark {
19    enum {
20        kStorageFormat = 0x4C4A,  // b0 = 'J', b1 = <4C> (on Intel)
21        kReverseFormat = 0x4A4C  // b0 = <4C>, b1 = 'J'
22    };
23
24    t4_byte _data[8];
25
26  public:
27    c4_FileMark();
28    c4_FileMark(t4_i32 pos_, bool flipped_, bool extend_);
29    c4_FileMark(t4_i32 pos_, int len_);
30
31    t4_i32 Offset()const;
32    t4_i32 OldOffset()const;
33
34    bool IsHeader()const;
35    bool IsOldHeader()const;
36    bool IsFlipped()const;
37};
38
39/////////////////////////////////////////////////////////////////////////////
40
41c4_FileMark::c4_FileMark() {
42  d4_assert(sizeof *this == 8);
43}
44
45c4_FileMark::c4_FileMark(t4_i32 pos_, bool flipped_, bool extend_) {
46  d4_assert(sizeof *this == 8);
47  *(short*)_data = flipped_ ? kReverseFormat : kStorageFormat;
48  _data[2] = extend_ ? 0x0A : 0x1A;
49  _data[3] = 0;
50  t4_byte *p = _data + 4;
51  for (int i = 24; i >= 0; i -= 8)
52    *p++ = (t4_byte)(pos_ >> i);
53  d4_assert(p == _data + sizeof _data);
54}
55
56c4_FileMark::c4_FileMark(t4_i32 pos_, int len_) {
57  d4_assert(sizeof *this == 8);
58  t4_byte *p = _data;
59  *p++ = 0x80;
60  for (int j = 16; j >= 0; j -= 8)
61    *p++ = (t4_byte)(len_ >> j);
62  for (int i = 24; i >= 0; i -= 8)
63    *p++ = (t4_byte)(pos_ >> i);
64  d4_assert(p == _data + sizeof _data);
65}
66
67t4_i32 c4_FileMark::Offset()const {
68  t4_i32 v = 0;
69  for (int i = 4; i < 8; ++i)
70    v = (v << 8) + _data[i];
71  return v;
72}
73
74t4_i32 c4_FileMark::OldOffset()const {
75  t4_i32 v = 0;
76  for (int i = 8; --i >= 4;)
77    v = (v << 8) + _data[i];
78  return v;
79}
80
81bool c4_FileMark::IsHeader()const {
82  return (_data[0] == 'J' || _data[0] == 'L') && (_data[0] ^ _data[1]) == ('J'
83    ^ 'L') && _data[2] == 0x1A;
84}
85
86bool c4_FileMark::IsOldHeader()const {
87  return IsHeader() && _data[3] == 0x80;
88}
89
90bool c4_FileMark::IsFlipped()const {
91  return *(short*)_data == kReverseFormat;
92
93}
94
95/////////////////////////////////////////////////////////////////////////////
96
97class c4_Allocator: public c4_DWordArray {
98  public:
99    c4_Allocator();
100
101    void Initialize(t4_i32 first_ = 1);
102
103    t4_i32 AllocationLimit()const;
104
105    t4_i32 Allocate(t4_i32 len_);
106    void Occupy(t4_i32 pos_, t4_i32 len_);
107    void Release(t4_i32 pos_, t4_i32 len_);
108    void Dump(const char *str_);
109    t4_i32 FreeCounts(t4_i32 *bytes_ = 0);
110
111  private:
112    int Locate(t4_i32 pos_)const;
113    void InsertPair(int i_, t4_i32 from_, t4_i32 to_);
114    t4_i32 ReduceFrags(int goal_, int sHi_, int sLo_);
115};
116
117/////////////////////////////////////////////////////////////////////////////
118//
119//  Allocation of blocks is maintained in a separate data structure.
120//  There is no allocation overhead in the allocation arena itself.
121//
122//  A single vector of "walls" is maintained, sorted by position:
123//
124//    * Each transition between free and allocated is a single entry.
125//      The number of entries is <num-free-ranges> + <num-used-ranges>.
126//    * By definition, free areas start at the positions indicated
127//      by the entries on even indices. Allocated ones use odd entries.
128//    * There is an extra <0,0> free slot at the very beginning. This
129//      simplifies boundary conditions at the start of the arena.
130//    * Position zero cannot be allocated, first slot starts at 1.
131//
132//  Properties of this approach:
133//
134//    * No allocation overhead for adjacent allocated areas. On the
135//      other hand, the allocator does not know the size of used slots.
136//    * Alternate function allows marking a specific range as occupied.
137//    * Allocator can be initialized as either all free or all in-use.
138//    * Allocation info contains only integers, it could be stored.
139//    * To extend allocated slots: "occupy" extra bytes at the end.
140//    * Generic: can be used for memory, disk files, and array entries.
141
142c4_Allocator::c4_Allocator() {
143  Initialize();
144}
145
146void c4_Allocator::Initialize(t4_i32 first_) {
147  SetSize(0, 1000); // empty, and growing in large chunks
148  Add(0); // fake block at start
149  Add(0); // ... only used to avoid merging
150
151  // if occupied, add a tiny free slot at the end, else add entire range
152  const t4_i32 kMaxInt = 0x7fffffff;
153  if (first_ == 0)
154    first_ = kMaxInt;
155
156  Add(first_); // start at a nicely aligned position
157  Add(kMaxInt); // ... there is no limit on file size
158}
159
160t4_i32 c4_Allocator::Allocate(t4_i32 len_) {
161  // zero arg is ok, it simply returns first allocatable position
162  for (int i = 2; i < GetSize(); i += 2)
163  if (GetAt(i + 1) >= GetAt(i) + len_) {
164    t4_i32 pos = GetAt(i);
165    if ((t4_i32)GetAt(i + 1) > pos + len_)
166      ElementAt(i) += len_;
167    else
168      RemoveAt(i, 2);
169    return pos;
170  }
171
172  d4_assert(0);
173  return 0; // not reached
174}
175
176void c4_Allocator::Occupy(t4_i32 pos_, t4_i32 len_) {
177  d4_assert(pos_ > 0);
178  // note that zero size simply checks if there is any space to extend
179
180  int i = Locate(pos_);
181  d4_assert(0 < i && i < GetSize());
182
183  if (i % 2) {
184    // allocation is not at start of free block
185    d4_assert((t4_i32)GetAt(i - 1) < pos_);
186
187    if ((t4_i32)GetAt(i) == pos_ + len_)
188    // allocate from end of free block
189      SetAt(i, pos_);
190    else
191    // split free block in two
192      InsertPair(i, pos_, pos_ + len_);
193  } else if ((t4_i32)GetAt(i) == pos_)
194  /*
195  This side of the if used to be unconditional, but that was
196  incorrect if ReduceFrags gets called (which only happens with
197  severely fragmented files) - there are cases when allocation
198  leads to an occupy request of which the free space list knows
199  nothing about because it dropped small segments.  The solution
200  is to silently "allow" such allocations - fixed 29-02-2000
201  Thanks to Andrew Kuchling for his help in chasing this bug.
202   */
203   {
204    // else extend tail of allocated area
205    if ((t4_i32)GetAt(i + 1) > pos_ + len_)
206      ElementAt(i) += len_;
207    // move start of next free up
208    else
209      RemoveAt(i, 2);
210    // remove this slot
211  }
212}
213
214void c4_Allocator::Release(t4_i32 pos, t4_i32 len) {
215  int i = Locate(pos + len);
216  d4_assert(0 < i && i < GetSize());
217  d4_assert(i % 2 == 0); // don't release inside a free block
218
219  if ((t4_i32)GetAt(i) == pos)
220  // move start of next free down
221    ElementAt(i) -= len;
222  else if ((t4_i32)GetAt(i - 1) == pos)
223  // move end of previous free up
224    ElementAt(i - 1) += len;
225  else
226  // insert a new entry
227    InsertPair(i, pos, pos + len);
228
229  if (GetAt(i - 1) == GetAt(i))
230  // merge if adjacent free
231    RemoveAt(i - 1, 2);
232}
233
234t4_i32 c4_Allocator::AllocationLimit()const {
235  d4_assert(GetSize() >= 2);
236
237  return GetAt(GetSize() - 2);
238}
239
240int c4_Allocator::Locate(t4_i32 pos)const {
241  int lo = 0, hi = GetSize() - 1;
242
243  while (lo < hi) {
244    int i = (lo + hi) / 2;
245    if (pos < (t4_i32)GetAt(i))
246      hi = i - 1;
247    else if (pos > (t4_i32)GetAt(i))
248      lo = i + 1;
249    else
250      return i;
251  }
252
253  return lo < GetSize() && pos > (t4_i32)GetAt(lo) ? lo + 1: lo;
254}
255
256void c4_Allocator::InsertPair(int i_, t4_i32 from_, t4_i32 to_) {
257  d4_assert(0 < i_);
258  d4_assert(i_ < GetSize());
259
260  d4_assert(from_ < to_);
261  d4_assert((t4_i32)GetAt(i_ - 1) < from_);
262  //!d4_assert(to_ < GetAt(i_));
263
264  if (to_ >= (t4_i32)GetAt(i_))
265    return ;
266  // ignore 2nd allocation of used area
267
268  InsertAt(i_, from_, 2);
269  SetAt(i_ + 1, to_);
270
271  // it's ok to have arrays up to some 30000 bytes
272  if (GetSize() > 7500)
273    ReduceFrags(5000, 12, 6);
274}
275
276t4_i32 c4_Allocator::ReduceFrags(int goal_, int sHi_, int sLo_) {
277  // drastic fail-safe measure: remove small gaps if vec gets too long
278  // this will cause some lost free space but avoids array overflow
279  // the lost space will most probably be re-used after the next commit
280
281  int limit = GetSize() - 2;
282  t4_i32 loss = 0;
283
284  // go through all entries and remove gaps under the given threshold
285  for (int shift = sHi_; shift >= sLo_; --shift) {
286    // the threshold is a fraction of the current size of the arena
287    t4_i32 threshold = AllocationLimit() >> shift;
288    if (threshold == 0)
289      continue;
290
291    int n = 2;
292    for (int i = n; i < limit; i += 2)
293    if ((t4_i32)GetAt(i + 1) - (t4_i32)GetAt(i) > threshold) {
294      SetAt(n++, GetAt(i));
295      SetAt(n++, GetAt(i + 1));
296    } else
297      loss += GetAt(i + 1) - GetAt(i);
298
299    limit = n;
300
301    // if (GetSize() < goal_) - suboptimal, fixed 29-02-2000
302    if (limit < goal_)
303      break;
304    // got rid of enough entries, that's enough
305  }
306
307  int n = GetSize() - 2;
308  SetAt(limit++, GetAt(n++));
309  SetAt(limit++, GetAt(n));
310  SetSize(limit);
311
312  return loss;
313}
314
315#if q4_CHECK
316#include <stdio.h>
317
318void c4_Allocator::Dump(const char *str_) {
319  fprintf(stderr, "c4_Allocator::Dump, %d entries <%s>\n", GetSize(), str_);
320  for (int i = 2; i < GetSize(); i += 2)
321    fprintf(stderr, "  %10ld .. %ld\n", GetAt(i - 1), GetAt(i));
322  fprintf(stderr, "END\n");
323}
324
325#else
326
327void c4_Allocator::Dump(const char *str_){}
328
329#endif
330
331t4_i32 c4_Allocator::FreeCounts(t4_i32 *bytes_) {
332  if (bytes_ != 0) {
333    t4_i32 total = 0;
334    for (int i = 2; i < GetSize() - 2; i += 2)
335      total += GetAt(i + 1) - GetAt(i);
336    *bytes_ = total;
337  }
338  return GetSize() / 2-2;
339}
340
341/////////////////////////////////////////////////////////////////////////////
342
343class c4_Differ {
344  public:
345    c4_Differ(c4_Storage &storage_);
346    ~c4_Differ();
347
348    int NewDiffID();
349    void CreateDiff(int id_, c4_Column &col_);
350    t4_i32 BaseOfDiff(int id_);
351    void ApplyDiff(int id_, c4_Column &col_)const;
352
353    void GetRoot(c4_Bytes &buffer_);
354
355    c4_Storage _storage;
356    c4_View _diffs;
357    c4_View _temp;
358
359  private:
360    void AddEntry(t4_i32, t4_i32, const c4_Bytes &);
361
362    c4_ViewProp pCols; //  column info:
363    c4_IntProp pOrig; //    original position
364    c4_ViewProp pDiff; //    difference chunks:
365    c4_IntProp pKeep; //      offset
366    c4_IntProp pResize; //      length
367    c4_BytesProp pBytes; //      data
368};
369
370c4_Differ::c4_Differ(c4_Storage &storage_): _storage(storage_), pCols("_C"),
371  pOrig("_O"), pDiff("_D"), pKeep("_K"), pResize("_R"), pBytes("_B") {
372  // weird names, to avoid clashing with existing ones (capitalization!)
373  _diffs = _storage.GetAs("_C[_O:I,_D[_K:I,_R:I,_B:B]]");
374}
375
376c4_Differ::~c4_Differ() {
377  _diffs = c4_View();
378}
379
380void c4_Differ::AddEntry(t4_i32 off_, t4_i32 len_, const c4_Bytes &data_) {
381  int n = _temp.GetSize();
382  _temp.SetSize(n + 1);
383  c4_RowRef r = _temp[n];
384
385  pKeep(r) = (t4_i32)off_;
386  pResize(r) = (t4_i32)len_;
387  pBytes(r).SetData(data_);
388}
389
390int c4_Differ::NewDiffID() {
391  int n = _diffs.GetSize();
392  _diffs.SetSize(n + 1);
393  return n;
394}
395
396void c4_Differ::CreateDiff(int id_, c4_Column &col_) {
397  _temp.SetSize(0);
398#if 0
399  t4_i32 offset = 0;
400  t4_i32 savedOff = 0;
401  t4_i32 savedLen = 0;
402
403  c4_Strategy *strat = col_.Persist() != 0 ? &col_.Strategy(): 0;
404
405  c4_ColIter iter(col_, 0, col_.ColSize());
406  while (iter.Next()) {
407    const t4_byte *p = iter.BufLoad();
408    if (strat != 0 && strat->_mapStart != 0 && p >= strat->_mapStart && p -
409      strat->_mapStart < strat->_dataSize) {
410      t4_i32 nextOff = p - strat->_mapStart;
411      if (savedLen == 0)
412        savedOff = nextOff;
413      if (nextOff == savedOff + savedLen) {
414        savedLen += iter.BufLen();
415        continue;
416      }
417
418      if (savedLen > 0)
419        AddEntry(savedOff, savedLen, c4_Bytes());
420
421      savedOff = nextOff;
422      savedLen = iter.BufLen();
423    } else {
424      AddEntry(savedOff, savedLen, c4_Bytes(p, iter.BufLen()));
425      savedLen = 0;
426    }
427
428    offset += iter.BufLen();
429  }
430
431  c4_View diff = pDiff(_diffs[id_]);
432  if (_temp.GetSize() != diff.GetSize() || _temp != diff)
433#else
434    c4_Bytes t1;
435  const t4_byte *p = col_.FetchBytes(0, col_.ColSize(), t1, false);
436  AddEntry(0, 0, c4_Bytes(p, col_.ColSize()));
437#endif
438  pDiff(_diffs[id_]) = _temp;
439
440  pOrig(_diffs[id_]) = col_.Position();
441}
442
443t4_i32 c4_Differ::BaseOfDiff(int id_) {
444  d4_assert(0 <= id_ && id_ < _diffs.GetSize());
445
446  return pOrig(_diffs[id_]);
447}
448
449void c4_Differ::ApplyDiff(int id_, c4_Column &col_)const {
450  d4_assert(0 <= id_ && id_ < _diffs.GetSize());
451
452  c4_View diff = pDiff(_diffs[id_]);
453  t4_i32 offset = 0;
454
455  for (int n = 0; n < diff.GetSize(); ++n) {
456    c4_RowRef row(diff[n]);
457    offset += pKeep(row);
458
459    c4_Bytes data;
460    pBytes(row).GetData(data);
461
462    // the following code is a lot like c4_MemoRef::Modify
463    const t4_i32 change = pResize(row);
464    if (change < 0)
465      col_.Shrink(offset,  - change);
466    else if (change > 0)
467      col_.Grow(offset, change);
468
469    col_.StoreBytes(offset, data);
470    offset += data.Size();
471  }
472
473  if (offset > col_.ColSize())
474    col_.Shrink(offset, offset - col_.ColSize());
475}
476
477void c4_Differ::GetRoot(c4_Bytes &buffer_) {
478  int last = _diffs.GetSize() - 1;
479  if (last >= 0) {
480    c4_Bytes temp;
481    c4_View diff = pDiff(_diffs[last]);
482    if (diff.GetSize() > 0)
483      pBytes(diff[0]).GetData(buffer_);
484  }
485}
486
487/////////////////////////////////////////////////////////////////////////////
488
489c4_SaveContext::c4_SaveContext(c4_Strategy &strategy_, bool fullScan_, int
490  mode_, c4_Differ *differ_, c4_Allocator *space_): _strategy(strategy_), _walk
491  (0), _differ(differ_), _space(space_), _cleanup(0), _nextSpace(0), _preflight
492  (true), _fullScan(fullScan_), _mode(mode_), _nextPosIndex(0), _bufPtr(_buffer)
493  , _curr(_buffer), _limit(_buffer) {
494  if (_space == 0)
495    _space = _cleanup = d4_new c4_Allocator;
496
497  _nextSpace = _mode == 1 ? d4_new c4_Allocator: _space;
498}
499
500c4_SaveContext::~c4_SaveContext() {
501  delete _cleanup;
502  if (_nextSpace != _space)
503    delete _nextSpace;
504}
505
506bool c4_SaveContext::IsFlipped()const {
507  return _strategy._bytesFlipped;
508}
509
510bool c4_SaveContext::Serializing()const {
511  return _fullScan;
512}
513
514void c4_SaveContext::AllocDump(const char *str_, bool next_) {
515  c4_Allocator *ap = next_ ? _nextSpace : _space;
516  if (ap != 0)
517    ap->Dump(str_);
518}
519
520void c4_SaveContext::FlushBuffer() {
521  int n = _curr - _bufPtr;
522  if (_walk != 0 && n > 0) {
523    t4_i32 end = _walk->ColSize();
524    _walk->Grow(end, n);
525    _walk->StoreBytes(end, c4_Bytes(_bufPtr, n));
526  }
527
528  _curr = _bufPtr = _buffer;
529  _limit = _buffer + sizeof _buffer;
530}
531
532c4_Column *c4_SaveContext::SetWalkBuffer(c4_Column *col_) {
533  FlushBuffer();
534
535  c4_Column *prev = _walk;
536  _walk = col_;
537  return prev;
538}
539
540void c4_SaveContext::Write(const void *buf_, int len_) {
541  // use buffering if possible
542  if (_curr + len_ <= _limit) {
543    memcpy(_curr, buf_, len_);
544    _curr += len_;
545  } else {
546    FlushBuffer();
547    _bufPtr = (t4_byte*)buf_; // also loses const
548    _curr = _limit = _bufPtr + len_;
549    FlushBuffer();
550  }
551}
552
553void c4_SaveContext::StoreValue(t4_i32 v_) {
554  if (_walk == 0)
555    return ;
556
557  if (_curr + 10 >= _limit)
558    FlushBuffer();
559
560  d4_assert(_curr + 10 < _limit);
561  c4_Column::PushValue(_curr, v_);
562}
563
564void c4_SaveContext::SaveIt(c4_HandlerSeq &root_, c4_Allocator **spacePtr_,
565  c4_Bytes &rootWalk_) {
566  d4_assert(_space != 0);
567
568  const t4_i32 size = _strategy.FileSize();
569  if (_strategy._failure != 0)
570    return ;
571
572  const t4_i32 end = _fullScan ? 0 : size - _strategy._baseOffset;
573
574  if (_differ == 0) {
575    if (_mode != 1)
576      _space->Initialize();
577
578    // don't allocate anything inside the file in extend mode
579    if (_mode == 2 && end > 0) {
580      _space->Occupy(1, end - 1);
581      _nextSpace->Occupy(1, end - 1);
582    }
583
584    // the header is always reserved
585    _space->Occupy(1, 7);
586    _nextSpace->Occupy(1, 7);
587
588    if (end > 0) {
589      d4_assert(end >= 16);
590      _space->Occupy(end - 16, 16);
591      _nextSpace->Occupy(end - 16, 16);
592      _space->Occupy(end, 8);
593      _nextSpace->Occupy(end, 8);
594    }
595  }
596
597  //AllocDump("a1", false);
598  //AllocDump("a2", true);
599
600  // first pass allocates columns and constructs shallow walks
601  c4_Column walk(root_.Persist());
602  SetWalkBuffer(&walk);
603  CommitSequence(root_, true);
604  SetWalkBuffer(0);
605  CommitColumn(walk);
606
607  c4_Bytes tempWalk;
608  walk.FetchBytes(0, walk.ColSize(), tempWalk, true);
609
610  t4_i32 limit = _nextSpace->AllocationLimit();
611  d4_assert(limit >= 8 || _differ != 0);
612
613  if (limit < 0) {
614    // 2006-01-12 #2: catch file size exceeding 2 Gb
615    _strategy._failure =  - 1; // unusual non-zero value flags this case
616    return ;
617  }
618
619  bool changed = _fullScan || tempWalk != rootWalk_;
620
621  rootWalk_ = c4_Bytes(tempWalk.Contents(), tempWalk.Size(), true);
622
623  _preflight = false;
624
625  // special-case to avoid saving data if file is logically empty
626  // in that case, the data is 0x80 0x81 0x80 (plus the header)
627  if (!_fullScan && limit <= 11 && _differ == 0) {
628    _space->Initialize();
629    _nextSpace->Initialize();
630    changed = false;
631  }
632
633  if (!changed)
634    return ;
635
636  //AllocDump("b1", false);
637  //AllocDump("b2", true);
638
639  if (_differ != 0) {
640    int n = _differ->NewDiffID();
641    _differ->CreateDiff(n, walk);
642    return ;
643  }
644
645  d4_assert(_mode != 0 || _fullScan);
646
647  // this is the place where writing may start
648
649  // figure out where the new file ends and write a skip tail there
650  t4_i32 end0 = end;
651
652  // true if the file need not be extended due to internal free space
653  bool inPlace = end0 == limit - 8;
654  if (inPlace) {
655    d4_assert(!_fullScan);
656    _space->Release(end0, 8);
657    _nextSpace->Release(end0, 8);
658    end0 -= 16; // overwrite existing tail markers
659  } else {
660    /* 18-11-2005 write new end marker and flush it before *anything* else! */
661    if (!_fullScan && end0 < limit) {
662      c4_FileMark mark1(limit, 0);
663      _strategy.DataWrite(limit, &mark1, sizeof mark1);
664      _strategy.DataCommit(0);
665      if (_strategy._failure != 0)
666        return ;
667    }
668
669    c4_FileMark head(limit + 16-end, _strategy._bytesFlipped, end > 0);
670    _strategy.DataWrite(end, &head, sizeof head);
671
672    if (end0 < limit)
673      end0 = limit;
674    // create a gap
675  }
676
677  t4_i32 end1 = end0 + 8;
678  t4_i32 end2 = end1 + 8;
679
680  if (!_fullScan && !inPlace) {
681    c4_FileMark mark1(end0, 0);
682    _strategy.DataWrite(end0, &mark1, sizeof mark1);
683#if q4_WIN32
684    /* March 8, 2002
685     * On at least NT4 with NTFS, extending a file can cause it to be
686     * rounded up further than expected.  To prevent creating a bad
687     * file (since the file does then not end with a marker), the
688     * workaround it so simply accept the new end instead and rewrite.
689     * Note that between these two writes, the file is in a bad state.
690     */
691    t4_i32 realend = _strategy.FileSize() - _strategy._baseOffset;
692    if (realend > end1) {
693      end0 = limit = realend - 8;
694      end1 = realend;
695      end2 = realend + 8;
696      c4_FileMark mark1a(end0, 0);
697      _strategy.DataWrite(end0, &mark1a, sizeof mark1a);
698    }
699#endif
700    d4_assert(_strategy.FileSize() == _strategy._baseOffset + end1);
701  }
702
703  _space->Occupy(end0, 16);
704  _nextSpace->Occupy(end0, 16);
705
706  // strategy.DataCommit(0); // may be needed, need more info on how FS's work
707  // but this would need more work, since we can't adjust file-mapping here
708
709  // second pass saves the columns and structure to disk
710  CommitSequence(root_, true); // writes changed columns
711  CommitColumn(walk);
712
713  //! d4_assert(_curr == 0);
714  d4_assert(_nextPosIndex == _newPositions.GetSize());
715
716  if (_fullScan) {
717    c4_FileMark mark1(limit, 0);
718    _strategy.DataWrite(_strategy.FileSize() - _strategy._baseOffset,  &mark1,
719      sizeof mark1);
720
721    c4_FileMark mark2(limit - walk.ColSize(), walk.ColSize());
722    _strategy.DataWrite(_strategy.FileSize() - _strategy._baseOffset,  &mark2,
723      sizeof mark2);
724
725    return ;
726  }
727
728  if (inPlace)
729    d4_assert(_strategy.FileSize() == _strategy._baseOffset + end2);
730  else {
731    // make sure the allocated size hasn't changed
732    d4_assert(_nextSpace->AllocationLimit() == limit + 16);
733    d4_assert(end0 >= limit);
734    d4_assert(_strategy.FileSize() - _strategy._baseOffset == end1);
735  }
736
737  if (walk.Position() == 0 || _strategy._failure != 0)
738    return ;
739
740  _strategy.DataCommit(0);
741
742  c4_FileMark mark2(walk.Position(), walk.ColSize());
743  _strategy.DataWrite(end1, &mark2, sizeof mark2);
744  d4_assert(_strategy.FileSize() - _strategy._baseOffset == end2);
745
746  // do not alter the file header in extend mode, unless it is new
747  if (!_fullScan && (_mode == 1 || end == 0)) {
748    _strategy.DataCommit(0);
749
750    c4_FileMark head(end2, _strategy._bytesFlipped, false);
751    d4_assert(head.IsHeader());
752    _strategy.DataWrite(0, &head, sizeof head);
753
754    // if the file became smaller, we could shrink it
755    if (limit + 16 < end0) {
756      /*
757      Not yet, this depends on the strategy class being able to truncate, but
758      there is no way to find out whether it does (the solution is to write tail
759      markers in such a way that the file won't grow unnecessarily if it doesn't).
760
761      The logic will probably be:
762
763       * write new skip + commit "tails" at limit (no visible effect on file)
764       * overwrite commit tail at end  with a skip to this new one (equivalent)
765       * replace header with one pointing to that internal new one (equivalent)
766       * flush (now the file is valid both truncated and not-yet-truncated
767
768      end = limit;
769       */
770    }
771  }
772
773  // if using memory mapped files, make sure the map is no longer in use
774  if (_strategy._mapStart != 0)
775    root_.UnmappedAll();
776
777  // commit and tell strategy object what the new file size is, this
778  // may be smaller now, if old data at the end is no longer referenced
779  _strategy.DataCommit(end2);
780
781  d4_assert(_strategy.FileSize() - _strategy._baseOffset == end2);
782
783  if (spacePtr_ != 0 && _space != _nextSpace) {
784    d4_assert(*spacePtr_ == _space);
785    delete  *spacePtr_;
786    *spacePtr_ = _nextSpace;
787    _nextSpace = 0;
788  }
789}
790
791bool c4_SaveContext::CommitColumn(c4_Column &col_) {
792  bool changed = col_.IsDirty() || _fullScan;
793
794  t4_i32 sz = col_.ColSize();
795  StoreValue(sz);
796  if (sz > 0) {
797    t4_i32 pos = col_.Position();
798
799    if (_differ) {
800      if (changed) {
801        int n = pos < 0 ? ~pos: _differ->NewDiffID();
802        _differ->CreateDiff(n, col_);
803
804        d4_assert(n >= 0);
805        pos = ~n;
806      }
807    } else if (_preflight) {
808      if (changed)
809        pos = _space->Allocate(sz);
810
811      _nextSpace->Occupy(pos, sz);
812      _newPositions.Add(pos);
813    } else {
814      pos = _newPositions.GetAt(_nextPosIndex++);
815
816      if (changed)
817        col_.SaveNow(_strategy, pos);
818
819      if (!_fullScan)
820        col_.SetLocation(pos, sz);
821    }
822
823    StoreValue(pos);
824  }
825
826  return changed;
827}
828
829void c4_SaveContext::CommitSequence(c4_HandlerSeq &seq_, bool selfDesc_) {
830  StoreValue(0); // sias prefix
831
832  if (selfDesc_) {
833    c4_String desc = seq_.Description();
834    int k = desc.GetLength();
835    StoreValue(k);
836    Write((const char*)desc, k);
837  }
838
839  StoreValue(seq_.NumRows());
840  if (seq_.NumRows() > 0)
841    for (int i = 0; i < seq_.NumFields(); ++i)
842      seq_.NthHandler(i).Commit(*this);
843}
844
845/////////////////////////////////////////////////////////////////////////////
846
847// used for on-the-fly conversion of old-format datafiles
848t4_byte *_oldBuf;
849const t4_byte *_oldCurr;
850const t4_byte *_oldLimit;
851t4_i32 _oldSeek;
852
853
854c4_Persist::c4_Persist(c4_Strategy &strategy_, bool owned_, int mode_): _space
855  (0), _strategy(strategy_), _root(0), _differ(0), _fCommit(0), _mode(mode_),
856  _owned(owned_), _oldBuf(0), _oldCurr(0), _oldLimit(0), _oldSeek( - 1) {
857  if (_mode == 1)
858    _space = d4_new c4_Allocator;
859}
860
861c4_Persist::~c4_Persist() {
862  delete _differ;
863
864  if (_owned) {
865    if (_root != 0)
866      _root->UnmappedAll();
867    delete  &_strategy;
868  }
869
870  delete _space;
871
872  if (_oldBuf != 0)
873    delete [] _oldBuf;
874}
875
876c4_HandlerSeq &c4_Persist::Root()const {
877  d4_assert(_root != 0);
878  return  *_root;
879}
880
881void c4_Persist::SetRoot(c4_HandlerSeq *root_) {
882  d4_assert(_root == 0);
883  _root = root_;
884}
885
886c4_Strategy &c4_Persist::Strategy()const {
887  return _strategy;
888}
889
890bool c4_Persist::AutoCommit(bool flag_) {
891  bool prev = _fCommit != 0;
892  if (flag_)
893    _fCommit = &c4_Persist::Commit;
894  else
895    _fCommit = 0;
896  return prev;
897}
898
899void c4_Persist::DoAutoCommit() {
900  if (_fCommit != 0)
901    (this->*_fCommit)(false);
902}
903
904bool c4_Persist::SetAside(c4_Storage &aside_) {
905  delete _differ;
906  _differ = d4_new c4_Differ(aside_);
907  Rollback(false);
908  return true; //! true if the generation matches
909}
910
911c4_Storage *c4_Persist::GetAside()const {
912  return _differ != 0 ? &_differ->_storage: 0;
913}
914
915bool c4_Persist::Commit(bool full_) {
916  // 1-Mar-1999, new semantics! return success status of commits
917  _strategy._failure = 0;
918
919  if (!_strategy.IsValid())
920    return false;
921
922  if (_mode == 0 && (_differ == 0 || full_))
923  // can't commit to r/o file
924    return false;
925  // note that _strategy._failure is *zero* in this case
926
927  c4_SaveContext ar(_strategy, false, _mode, full_ ? 0 : _differ, _space);
928
929  // get rid of temp properties which still use the datafile
930  if (_mode == 1)
931    _root->DetachFromStorage(false);
932
933  // 30-3-2001: moved down, fixes "crash every 2nd call of mkdemo/dbg"
934  ar.SaveIt(*_root, &_space, _rootWalk);
935  return _strategy._failure == 0;
936}
937
938bool c4_Persist::Rollback(bool full_) {
939  _root->DetachFromParent();
940  _root->DetachFromStorage(true);
941  _root = 0;
942
943  if (_space != 0)
944    _space->Initialize();
945
946  c4_HandlerSeq *seq = d4_new c4_HandlerSeq(this);
947  seq->DefineRoot();
948  SetRoot(seq);
949
950  if (full_) {
951    delete _differ;
952    _differ = 0;
953  }
954
955  LoadAll();
956
957  return _strategy._failure == 0;
958}
959
960bool c4_Persist::LoadIt(c4_Column &walk_) {
961  t4_i32 limit = _strategy.FileSize();
962  if (_strategy._failure != 0)
963    return false;
964
965  if (_strategy.EndOfData(limit) < 0) {
966    _strategy.SetBase(limit);
967    d4_assert(_strategy._failure == 0); // file is ok, but empty
968    return false;
969  }
970
971  if (_strategy._rootLen > 0)
972    walk_.SetLocation(_strategy._rootPos, _strategy._rootLen);
973
974  // if the file size has increased, we must remap
975  if (_strategy._mapStart != 0 && _strategy.FileSize() > _strategy._baseOffset
976    + _strategy._dataSize)
977    _strategy.ResetFileMapping();
978
979  return true;
980}
981
982void c4_Persist::LoadAll() {
983  c4_Column walk(this);
984  if (!LoadIt(walk))
985    return ;
986
987  if (_strategy._rootLen < 0) {
988    _oldSeek = _strategy._rootPos;
989    _oldBuf = d4_new t4_byte[512];
990    _oldCurr = _oldLimit = _oldBuf;
991
992    t4_i32 n = FetchOldValue();
993    d4_assert(n == 0);
994    n = FetchOldValue();
995    d4_assert(n > 0);
996
997    c4_Bytes temp;
998    t4_byte *buf = temp.SetBuffer(n);
999    d4_dbgdef(int n2 = )OldRead(buf, n);
1000    d4_assert(n2 == n);
1001
1002    c4_String s = "[" + c4_String((const char*)buf, n) + "]";
1003    const char *desc = s;
1004
1005    c4_Field *f = d4_new c4_Field(desc);
1006    d4_assert(! *desc);
1007
1008    //?_root->DefineRoot();
1009    _root->Restructure(*f, false);
1010
1011    _root->OldPrepare();
1012
1013    // don't touch data inside while converting the file
1014    if (_strategy.FileSize() >= 0)
1015      OccupySpace(1, _strategy.FileSize());
1016  } else {
1017    walk.FetchBytes(0, walk.ColSize(), _rootWalk, true);
1018    if (_differ)
1019      _differ->GetRoot(_rootWalk);
1020
1021    // 2006-08-01: maintain stable-storage space usage on re-open
1022    OccupySpace(_strategy._rootPos, _strategy._rootLen);
1023
1024    // define and fill the root table
1025    const t4_byte *ptr = _rootWalk.Contents();
1026    _root->Prepare(&ptr, true);
1027    d4_assert(ptr == _rootWalk.Contents() + _rootWalk.Size());
1028  }
1029}
1030
1031t4_i32 c4_Persist::FetchOldValue() {
1032  d4_assert(_oldSeek >= 0);
1033
1034  if (_oldCurr == _oldLimit) {
1035    int n = OldRead(_oldBuf, 500);
1036    _oldLimit = _oldCurr + n;
1037    _oldBuf[n] = 0x80; // to force end
1038  }
1039
1040  const t4_byte *p = _oldCurr;
1041  t4_i32 value = c4_Column::PullValue(p);
1042
1043  if (p > _oldLimit) {
1044    int k = _oldLimit - _oldCurr;
1045    d4_assert(0 < k && k < 10);
1046    memcpy(_oldBuf, _oldCurr, k);
1047
1048    int n = OldRead(_oldBuf + k, 500);
1049    _oldCurr = _oldBuf + k;
1050    _oldLimit = _oldCurr + n;
1051    _oldBuf[n + k] = 0x80; // to force end
1052
1053    p = _oldCurr;
1054    value = c4_Column::PullValue(p);
1055    d4_assert(p <= _oldLimit);
1056  }
1057
1058  _oldCurr = p;
1059  return value;
1060}
1061
1062void c4_Persist::FetchOldLocation(c4_Column &col_) {
1063  d4_assert(_oldSeek >= 0);
1064
1065  t4_i32 sz = FetchOldValue();
1066  if (sz > 0)
1067    col_.SetLocation(FetchOldValue(), sz);
1068}
1069
1070t4_i32 c4_Persist::FreeBytes(t4_i32 *bytes_) {
1071  return _space == 0 ?  - 1: _space->FreeCounts(bytes_);
1072}
1073
1074int c4_Persist::OldRead(t4_byte *buf_, int len_) {
1075  d4_assert(_oldSeek >= 0);
1076
1077  t4_i32 newSeek = _oldSeek + _oldCurr - _oldLimit;
1078  int n = _strategy.DataRead(newSeek, buf_, len_);
1079  d4_assert(n > 0);
1080  _oldSeek = newSeek + n;
1081  _oldCurr = _oldLimit = _oldBuf;
1082  return n;
1083}
1084
1085c4_HandlerSeq *c4_Persist::Load(c4_Stream *stream_) {
1086  d4_assert(stream_ != 0);
1087
1088  c4_FileMark head;
1089  if (stream_->Read(&head, sizeof head) != sizeof head || !head.IsHeader())
1090    return 0;
1091  // no data in file
1092
1093  //_oldStyle = head._data[3] == 0x80;
1094  d4_assert(!head.IsOldHeader());
1095
1096  t4_i32 limit = head.Offset();
1097
1098  c4_StreamStrategy *strat = d4_new c4_StreamStrategy(limit);
1099  strat->_bytesFlipped = head.IsFlipped();
1100  strat->DataWrite(strat->FileSize() - strat->_baseOffset, &head, sizeof head);
1101
1102  while (strat->FileSize() - strat->_baseOffset < limit) {
1103    char buffer[4096];
1104    int n = stream_->Read(buffer, sizeof buffer);
1105    d4_assert(n > 0);
1106    strat->DataWrite(strat->FileSize() - strat->_baseOffset, buffer, n);
1107  }
1108
1109  c4_Persist *pers = d4_new c4_Persist(*strat, true, 0);
1110  c4_HandlerSeq *seq = d4_new c4_HandlerSeq(pers);
1111  seq->DefineRoot();
1112  pers->SetRoot(seq);
1113
1114  c4_Column walk(pers);
1115  if (!pers->LoadIt(walk)) {
1116    seq->IncRef();
1117    seq->DecRef(); // a funny way to delete
1118    return 0;
1119  }
1120
1121  c4_Bytes tempWalk;
1122  walk.FetchBytes(0, walk.ColSize(), tempWalk, true);
1123
1124  const t4_byte *ptr = tempWalk.Contents();
1125  seq->Prepare(&ptr, true);
1126  d4_assert(ptr == tempWalk.Contents() + tempWalk.Size());
1127
1128  return seq;
1129}
1130
1131void c4_Persist::Save(c4_Stream *stream_, c4_HandlerSeq &root_) {
1132  d4_assert(stream_ != 0);
1133
1134  c4_StreamStrategy strat(stream_);
1135
1136  // 31-01-2002: streaming must adopt byte order of origin datafile
1137  c4_Persist *p = root_.Persist();
1138  if (p != 0)
1139    strat._bytesFlipped = p->Strategy()._bytesFlipped;
1140
1141  c4_SaveContext ar(strat, true, 0, 0, 0);
1142  c4_Bytes tempWalk;
1143  ar.SaveIt(root_, 0, tempWalk);
1144}
1145
1146t4_i32 c4_Persist::LookupAside(int id_) {
1147  d4_assert(_differ != 0);
1148
1149  return _differ->BaseOfDiff(id_);
1150}
1151
1152void c4_Persist::ApplyAside(int id_, c4_Column &col_) {
1153  d4_assert(_differ != 0);
1154
1155  _differ->ApplyDiff(id_, col_);
1156}
1157
1158void c4_Persist::OccupySpace(t4_i32 pos_, t4_i32 len_) {
1159  d4_assert(_mode != 1 || _space != 0);
1160
1161  if (_space != 0)
1162    _space->Occupy(pos_, len_);
1163}
1164
1165/////////////////////////////////////////////////////////////////////////////
1166