1// remap.cpp --
2// $Id: remap.cpp 1230 2007-03-09 15:58:53Z jcw $
3// This is part of Metakit, the homepage is http://www.equi4.com/metakit.html
4
5/** @file
6 * Mapping and remapping custom viewers
7 */
8
9#include "header.h"
10#include "remap.h"
11#include "handler.h"
12
13/////////////////////////////////////////////////////////////////////////////
14
15class c4_ReadOnlyViewer: public c4_CustomViewer {
16    c4_View _base;
17
18  public:
19    c4_ReadOnlyViewer(c4_Sequence &seq_): _base(&seq_){}
20    virtual ~c4_ReadOnlyViewer(){}
21
22    virtual c4_View GetTemplate() {
23        return _base.Clone();
24    }
25    virtual int GetSize() {
26        return _base.GetSize();
27    }
28
29    virtual int Lookup(c4_Cursor key_, int &count_) {
30        int pos = 0;
31        count_ = _base.GetSize();
32        return _base.RestrictSearch(*key_, pos, count_);
33    }
34
35    virtual bool GetItem(int row_, int col_, c4_Bytes &buf_) {
36        return _base.GetItem(row_, col_, buf_);
37    }
38};
39
40/////////////////////////////////////////////////////////////////////////////
41
42class c4_HashViewer: public c4_CustomViewer {
43    c4_View _base;
44    c4_View _map;
45    int _numKeys;
46
47    c4_IntProp _pHash;
48    c4_IntProp _pRow;
49
50    bool KeySame(int row_, c4_Cursor cursor_)const;
51    t4_i32 CalcHash(c4_Cursor cursor_)const;
52    int LookDict(t4_i32 hash_, c4_Cursor cursor_)const;
53    void InsertDict(int row_);
54    void RemoveDict(int pos_);
55    bool DictResize(int minused);
56
57    int Row(int i_)const {
58        return _pRow(_map[i_]);
59    }
60    int Hash(int i_)const {
61        return _pHash(_map[i_]);
62    }
63
64    void SetRow(int i_, int v_) {
65        _pRow(_map[i_]) = v_;
66    }
67    void SetHash(int i_, int v_) {
68        _pHash(_map[i_]) = v_;
69    }
70
71    bool IsUnused(int)const;
72    bool IsDummy(int)const;
73    bool IsActive(int i_)const {
74        return Row(i_) >= 0;
75    }
76
77    int GetPoly()const;
78    void SetPoly(int v_);
79    int GetSpare()const;
80    void SetSpare(int v_);
81
82  public:
83    c4_HashViewer(c4_Sequence &seq_, int numKeys_, c4_Sequence *map_ = 0);
84    virtual ~c4_HashViewer();
85
86    virtual c4_View GetTemplate();
87    virtual int GetSize();
88    virtual int Lookup(c4_Cursor key_, int &count_);
89    virtual bool GetItem(int row_, int col_, c4_Bytes &buf_);
90    virtual bool SetItem(int row_, int col_, const c4_Bytes &buf_);
91    virtual bool InsertRows(int pos_, c4_Cursor value_, int count_ = 1);
92    virtual bool RemoveRows(int pos_, int count_ = 1);
93};
94
95/////////////////////////////////////////////////////////////////////////////
96// The following contains code derived froms Python's dictionaries, hence:
97//  Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
98//  The Netherlands.
99// Reduced and turned into a fast C++ class by Christian Tismer, hence:
100//  Copyright 1999 by Christian Tismer.
101// Vectorized and reorganized further by Jean-Claude Wippler.
102/////////////////////////////////////////////////////////////////////////////
103
104//  Table of irreducible polynomials to efficiently cycle through
105//  GF(2^n)-{0}, 2<=n<=30.
106
107static long s_polys[] =  {
108  4+3, 8+3, 16+3, 32+5, 64+3, 128+3, 256+29, 512+17, 1024+9, 2048+5, 4096+83,
109    8192+27, 16384+43, 32768+3, 65536+45, 131072+9, 262144+39, 524288+39,
110    1048576+9, 2097152+5, 4194304+3, 8388608+33, 16777216+27, 33554432+9,
111    67108864+71, 134217728+39, 268435456+9, 536870912+5, 1073741824+83, 0
112};
113
114/////////////////////////////////////////////////////////////////////////////
115
116c4_HashViewer::c4_HashViewer(c4_Sequence &seq_, int numKeys_, c4_Sequence *map_)
117  : _base(&seq_), _map(map_), _numKeys(numKeys_), _pHash("_H"), _pRow("_R") {
118  if (_map.GetSize() == 0)
119    _map.SetSize(1);
120
121  int poly = GetPoly();
122  if (poly == 0 || _map.GetSize() <= _base.GetSize())
123    DictResize(_base.GetSize());
124}
125
126c4_HashViewer::~c4_HashViewer(){}
127
128bool c4_HashViewer::IsUnused(int row_)const {
129  c4_RowRef r = _map[row_];
130  return _pRow(r) < 0 && _pHash(r) == 0;
131}
132
133bool c4_HashViewer::IsDummy(int row_)const {
134  c4_RowRef r = _map[row_];
135  return _pRow(r) < 0 && _pHash(r) < 0;
136}
137
138int c4_HashViewer::GetPoly()const {
139  return Hash(_map.GetSize() - 1);
140}
141
142void c4_HashViewer::SetPoly(int v_) {
143  SetHash(_map.GetSize() - 1, v_);
144}
145
146int c4_HashViewer::GetSpare()const {
147  return Row(_map.GetSize() - 1);
148}
149
150void c4_HashViewer::SetSpare(int v_) {
151  SetRow(_map.GetSize() - 1, v_);
152}
153
154bool c4_HashViewer::KeySame(int row_, c4_Cursor cursor_)const {
155  for (int i = 0; i < _numKeys; ++i) {
156    c4_Bytes buffer;
157    _base.GetItem(row_, i, buffer);
158
159    c4_Handler &h = cursor_._seq->NthHandler(i);
160    if (h.Compare(cursor_._index, buffer) != 0)
161      return false;
162  }
163
164  return true;
165}
166
167/// Create mapped view which is uses a second view for hashing
168t4_i32 c4_HashViewer::CalcHash(c4_Cursor cursor_)const {
169  c4_Bytes buffer, buf2;
170  const t4_i32 endian = 0x03020100;
171  t4_i32 hash = 0;
172
173  for (int i = 0; i < _numKeys; ++i) {
174    c4_Handler &h = cursor_._seq->NthHandler(i);
175    cursor_._seq->Get(cursor_._index, h.PropId(), buffer);
176
177    // this code borrows from Python's stringobject.c/string_hash()
178    int len = buffer.Size();
179    if (len > 0) {
180      const t4_byte *p = buffer.Contents();
181
182      // 20030218: careful to avoid endian-ness sensitivity
183      if (*(const t4_byte*) &endian)
184      // true on big-endian systems
185      switch (h.Property().Type()) {
186        case 'I':
187        case 'L':
188        case 'F':
189        case 'D':
190           {
191            t4_byte *q = buf2.SetBuffer(len);
192            for (int j = 0; j < len; ++j)
193              q[len - j - 1] = p[j];
194            p = q;
195          }
196      }
197
198      long x =  *p << 7;
199
200      // modifications are risky, this code avoid scanning huge blobs
201      if (len > 200)
202        len = 100;
203
204      while (--len >= 0)
205        x = (1000003 *x) ^  *p++;
206
207      if (buffer.Size() > 200) {
208        len = 100;
209        p += buffer.Size() - 200;
210        while (--len >= 0)
211          x = (1000003 *x) ^  *p++;
212      }
213
214      x ^= buffer.Size();
215      hash ^= x ^ i;
216    }
217  }
218
219  if (hash == 0)
220    hash =  - 1;
221
222  return hash;
223}
224
225/*
226 * Types of slots:
227 *  Unused: row = -1, hash = 0
228 *  Dummy:  row = -1, hash = -1
229 *  Active: row >= 0
230 * There must be at least one Unused slot at all times.
231 */
232
233int c4_HashViewer::LookDict(t4_i32 hash_, c4_Cursor cursor_)const {
234  const unsigned int mask = _map.GetSize() - 2;
235  /* We must come up with (i, incr) such that 0 <= i < _size
236  and 0 < incr < _size and both are a function of hash */
237  int i = mask &~hash_;
238  /* We use ~hash_ instead of hash_, as degenerate hash functions, such
239  as for ints <sigh>, can have lots of leading zeros. It's not
240  really a performance risk, but better safe than sorry. */
241  if (IsUnused(i) || Hash(i) == hash_ && KeySame(Row(i), cursor_))
242    return i;
243
244  int freeslot = IsDummy(i) ? i :  - 1;
245
246  /* Derive incr from hash_, just to make it more arbitrary. Note that
247  incr must not be 0, or we will get into an infinite loop.*/
248  unsigned incr = (hash_ ^ ((unsigned long)hash_ >> 3)) &mask;
249  if (!incr)
250    incr = mask;
251
252  int poly = GetPoly();
253  for (;;) {
254    i = (i + incr) &mask;
255    if (IsUnused(i))
256      break;
257    if (Hash(i) == hash_ && KeySame(Row(i), cursor_))
258      return i;
259    if (freeslot ==  - 1 && IsDummy(i))
260      freeslot = i;
261    /* Cycle through GF(2^n)-{0} */
262    incr = incr << 1;
263    if (incr > mask)
264      incr ^= poly;
265     /* This will implicitely clear the highest bit */
266  }
267
268  return freeslot !=  - 1 ? freeslot : i;
269}
270
271void c4_HashViewer::InsertDict(int row_) {
272  c4_Cursor cursor = &_base[row_];
273
274  t4_i32 hash = CalcHash(cursor);
275  int i = LookDict(hash, cursor);
276
277  if (IsDummy(i)) {
278    int n = GetSpare();
279    d4_assert(n > 0);
280    SetSpare(n - 1);
281  }
282
283  SetHash(i, hash);
284  SetRow(i, row_);
285}
286
287void c4_HashViewer::RemoveDict(int pos_) {
288  c4_Cursor key = &_base[pos_];
289  t4_i32 hash = CalcHash(key);
290  int i = LookDict(hash, key);
291  d4_assert(i >= 0);
292
293  d4_assert(Row(i) == pos_);
294
295  SetHash(i,  - 1);
296  SetRow(i,  - 1);
297
298  SetSpare(GetSpare() + 1);
299}
300
301bool c4_HashViewer::DictResize(int minused) {
302  int i, newsize, newpoly;
303  for (i = 0, newsize = 4;; i++, newsize <<= 1) {
304    if (s_polys[i] == 0)
305      return false;
306    else if (newsize > minused) {
307      newpoly = s_polys[i];
308      break;
309    }
310  }
311
312  _map.SetSize(0);
313
314  c4_Row empty;
315  _pRow(empty) =  - 1;
316  _map.InsertAt(0, empty, newsize + 1);
317
318  SetPoly(newpoly);
319  SetSpare(0);
320
321  for (int j = 0; j < _base.GetSize(); ++j)
322    InsertDict(j);
323
324  return true;
325}
326
327c4_View c4_HashViewer::GetTemplate() {
328  return _base.Clone();
329}
330
331int c4_HashViewer::GetSize() {
332  return _base.GetSize();
333}
334
335int c4_HashViewer::Lookup(c4_Cursor key_, int &count_) {
336  // can only use hashing if the properties match the query
337  // XXX it appears that this loop takes some 300 uS!
338  c4_View kv = (*key_).Container();
339  for (int k = 0; k < _numKeys; ++k)
340    if (kv.FindProperty(_base.NthProperty(k).GetId()) < 0)
341      return  - 1;
342
343  t4_i32 hash = CalcHash(key_); // TODO should combine with above loop
344  int i = LookDict(hash, key_);
345
346  int row = Row(i);
347  count_ = row >= 0 && KeySame(row, key_) ? 1 : 0;
348  return count_ ? row : 0; // don't return -1, we *know* it's not there
349}
350
351bool c4_HashViewer::GetItem(int row_, int col_, c4_Bytes &buf_) {
352  return _base.GetItem(row_, col_, buf_);
353}
354
355bool c4_HashViewer::SetItem(int row_, int col_, const c4_Bytes &buf_) {
356  if (col_ < _numKeys) {
357    c4_Bytes temp;
358    _base.GetItem(row_, col_, temp);
359    if (buf_ == temp)
360      return true;
361    // this call will have no effect, just ignore it
362
363    RemoveDict(row_);
364  }
365
366  _base.SetItem(row_, col_, buf_);
367
368  if (col_ < _numKeys) {
369    // careful if changing a key to one which is already present:
370    // in that case, delete the other row to preserve uniqueness
371    //
372    // Note: this is a tricky and confusing issue, because now the
373    // mere act of *setting* a property value can *delete* a row!
374    //
375    // The big problem here is that setting the rest of the values
376    // in a loop can end up *wrong*, if the row has moved down!!!
377    int n;
378    int i = Lookup(&_base[row_], n);
379    if (i >= 0 && n > 0) {
380      RemoveRows(i, 1);
381      if (i < row_)
382        --row_;
383    }
384
385    InsertDict(row_);
386  }
387
388  return true;
389}
390
391bool c4_HashViewer::InsertRows(int pos_, c4_Cursor value_, int count_) {
392  d4_assert(count_ > 0);
393
394  int n;
395  int i = Lookup(value_, n);
396  if (i >= 0 && n > 0) {
397    _base.SetAt(i,  *value_); // replace existing
398    return true;
399  }
400
401  // adjust row numbers if the insertion is not at the end
402  //
403  // TODO this could be optimized to go through the rows which
404  // were moved up, and then adjusting the map through a lookup
405  // (probably better than full scan if pos_ is relatively high)
406  if (pos_ < _base.GetSize()) {
407    for (int r = 0; r < _map.GetSize() - 1; ++r) {
408      int n2 = Row(r);
409      if (n2 >= pos_)
410        SetRow(r, n2 + 1);
411    }
412  }
413
414  _base.InsertAt(pos_,  *value_);
415  InsertDict(pos_);
416
417  int used = _base.GetSize();
418  int fill = used + GetSpare();
419  if (fill *3 >= (_map.GetSize() - 1) *2 && !DictResize(used *2))
420    return false;
421  // bail out
422
423  d4_assert(_base.GetSize() + GetSpare() < _map.GetSize() - 1);
424  return true;
425}
426
427bool c4_HashViewer::RemoveRows(int pos_, int count_) {
428  while (--count_ >= 0) {
429    // since the map persists, be somewhat more aggressive than the
430    // original code in resizing down when the map is getting empty
431    if (_base.GetSize() *3 < _map.GetSize() - 1 && !DictResize(_base.GetSize()))
432      return false;
433    // bail out
434
435    RemoveDict(pos_);
436
437    // move rows down for now
438    //
439    // optionally: consider replacing with last entry (much faster)
440    for (int r = 0; r < _map.GetSize() - 1; ++r) {
441      int n = Row(r);
442      if (n > pos_)
443        SetRow(r, n - 1);
444    }
445
446    _base.RemoveAt(pos_, 1);
447  }
448
449  return true;
450}
451
452/////////////////////////////////////////////////////////////////////////////
453
454class c4_BlockedViewer: public c4_CustomViewer {
455    enum {
456        kLimit = 1000
457    };
458
459    c4_View _base;
460
461    c4_ViewProp _pBlock;
462    c4_DWordArray _offsets;
463
464    int _last_base, _last_limit, _last_slot;
465    c4_View _last_view;
466
467    int Slot(int &pos_);
468    void Split(int block_, int row_);
469    void Merge(int block_);
470    void Validate()const;
471
472    // 2004-04-27 new cache logic, thx MB
473    void SetLast(int row_);
474    void ClearLast(int slot_) {
475        if (_last_slot >= slot_) {
476            _last_limit = _last_slot =  - 1;
477            _last_view = c4_View();
478        }
479    }
480
481  public:
482    c4_BlockedViewer(c4_Sequence &seq_);
483    virtual ~c4_BlockedViewer();
484
485    virtual c4_View GetTemplate();
486    virtual int GetSize();
487    virtual bool GetItem(int row_, int col_, c4_Bytes &buf_);
488    virtual bool SetItem(int row_, int col_, const c4_Bytes &buf_);
489    virtual bool InsertRows(int pos_, c4_Cursor value_, int count_ = 1);
490    virtual bool RemoveRows(int pos_, int count_ = 1);
491};
492
493/////////////////////////////////////////////////////////////////////////////
494
495#if q4_CHECK
496
497// debugging version to verify that the internal data is consistent
498void c4_BlockedViewer::Validate()const {
499  d4_assert(_base.GetSize() >= 2);
500
501  int n = _base.GetSize() - 1;
502  d4_assert(_offsets.GetSize() == n);
503
504  int total = 0;
505  for (int i = 0; i < n; i++) {
506    c4_View bv = _pBlock(_base[i]);
507    d4_assert(bv.GetSize() > 0 || i == 0);
508    total += bv.GetSize();
509    d4_assert((int)_offsets.GetAt(i) == total++);
510  }
511
512  c4_View be = _pBlock(_base[n]);
513  d4_assert(be.GetSize() == n - 1);
514}
515
516#else
517
518// nothing, so inline this thing to avoid even the calling overhead
519d4_inline void c4_BlockedViewer::Validate()const{}
520
521#endif
522
523c4_BlockedViewer::c4_BlockedViewer(c4_Sequence &seq_): _base(&seq_), _pBlock(
524  "_B"), _last_base( - 1), _last_limit( - 1), _last_slot( - 1) {
525  if (_base.GetSize() < 2)
526    _base.SetSize(2);
527
528  int n = _base.GetSize() - 1;
529  _offsets.SetSize(n);
530
531  int total = 0;
532  for (int i = 0; i < n; i++) {
533    c4_View bv = _pBlock(_base[i]);
534    total += bv.GetSize();
535    _offsets.SetAt(i, total++);
536  }
537  Validate();
538}
539
540c4_BlockedViewer::~c4_BlockedViewer(){}
541
542int c4_BlockedViewer::Slot(int &pos_) {
543  const int n = _offsets.GetSize();
544
545  d4_assert(n > 0);
546  d4_assert(pos_ <= (int)_offsets.GetAt(n - 1));
547
548#if 0
549  // not sure the following adds any performance (the logic looks ok)
550  // reason: won't work if inserts/removes always invalidate the cache
551  if (_last_base <= pos_ && pos_ < _last_limit) {
552    pos_ -= _last_base;
553    return _last_slot;
554  }
555#endif
556
557#if 0
558  int h;
559  for (h = 0; h < n; ++h)
560    if (pos_ <= (t4_i32)_offsets.GetAt(h))
561      break;
562#else
563  // switch to binary search, adapted from code by Zhang Dehua, 28-3-2002
564  // slows down some 5%, but said to be much better with 5 million rows
565  int l = 0, h = n - 1;
566  while (l < h) {
567    int m = l + (h - l) / 2;
568    if ((t4_i32)_offsets.GetAt(m) < pos_)
569      l = m + 1;
570    else
571      h = m;
572  }
573#endif
574
575  if (h > 0)
576    pos_ -= _offsets.GetAt(h - 1) + 1;
577
578  return h;
579}
580
581void c4_BlockedViewer::Split(int bno_, int row_) {
582  ClearLast(bno_);
583
584  int z = _offsets.GetSize();
585  d4_assert(bno_ < z);
586  c4_View bz = _pBlock(_base[z]);
587  c4_View bv = _pBlock(_base[bno_]);
588  d4_assert(row_ < bv.GetSize());
589
590  _offsets.InsertAt(bno_, _offsets.GetAt(bno_) - bv.GetSize() + row_);
591
592  _base.InsertAt(bno_ + 1, c4_Row());
593  c4_View bn = _pBlock(_base[bno_ + 1]);
594
595  bv.RelocateRows(row_ + 1,  - 1, bn, 0);
596  bv.RelocateRows(row_, 1, bz, bno_);
597
598  Validate();
599}
600
601void c4_BlockedViewer::Merge(int bno_) {
602  ClearLast(bno_);
603
604  int z = _offsets.GetSize();
605  c4_View bz = _pBlock(_base[z]);
606  c4_View bv1 = _pBlock(_base[bno_]);
607  c4_View bv2 = _pBlock(_base[bno_ + 1]);
608
609  _offsets.RemoveAt(bno_);
610
611  bz.RelocateRows(bno_, 1, bv1,  - 1);
612  bv2.RelocateRows(0,  - 1, bv1,  - 1);
613
614  _base.RemoveAt(bno_ + 1);
615
616  Validate();
617}
618
619c4_View c4_BlockedViewer::GetTemplate() {
620  c4_View bv = _pBlock(_base[0]);
621  return bv.Clone();
622}
623
624int c4_BlockedViewer::GetSize() {
625  int n = _offsets.GetAt(_offsets.GetSize() - 1);
626  d4_assert(n >= 0);
627  return n;
628}
629
630void c4_BlockedViewer::SetLast(int row_) {
631  int orig = row_;
632
633  int i = Slot(row_);
634  d4_assert(0 <= i && i < _offsets.GetSize());
635
636  _last_limit = _offsets.GetAt(i);
637
638  if (_last_limit == orig) {
639    row_ = i;
640    i = _offsets.GetSize();
641    _last_limit = 0; // force miss next time, but view is still cached
642  }
643
644  if (i != _last_slot) {
645    _last_slot = i;
646    _last_view = _pBlock(_base[i]);
647  }
648
649  _last_base = orig - row_;
650}
651
652bool c4_BlockedViewer::GetItem(int row_, int col_, c4_Bytes &buf_) {
653  if (row_ < _last_base || row_ >= _last_limit)
654    SetLast(row_);
655  return _last_view.GetItem(row_ - _last_base, col_, buf_);
656}
657
658bool c4_BlockedViewer::SetItem(int row_, int col_, const c4_Bytes &buf_) {
659  if (row_ < _last_base || row_ >= _last_limit)
660    SetLast(row_);
661  _last_view.SetItem(row_ - _last_base, col_, buf_);
662  return true;
663}
664
665bool c4_BlockedViewer::InsertRows(int pos_, c4_Cursor value_, int count_) {
666  d4_assert(count_ > 0);
667
668  bool atEnd = pos_ == GetSize();
669
670  int z = _offsets.GetSize();
671  int i = Slot(pos_);
672  d4_assert(0 <= i && i < z);
673
674  ClearLast(i);
675
676  c4_View bv = _pBlock(_base[i]);
677  d4_assert(0 <= pos_ && pos_ <= bv.GetSize());
678
679  bv.InsertAt(pos_,  *value_, count_);
680  for (int j = i; j < z; ++j)
681    _offsets.SetAt(j, _offsets.GetAt(j) + count_);
682
683  // massive insertions are first split off
684  while (bv.GetSize() >= 2 *kLimit)
685    Split(i, bv.GetSize() - kLimit - 2);
686
687  if (bv.GetSize() > kLimit)
688    Split(i, atEnd ? kLimit - 1: bv.GetSize() / 2);
689  // 23-3-2002, from MB
690
691  Validate();
692
693  return true;
694}
695
696bool c4_BlockedViewer::RemoveRows(int pos_, int count_) {
697  d4_assert(count_ > 0);
698  d4_assert(pos_ + count_ <= GetSize());
699
700  int z = _offsets.GetSize();
701  int i = Slot(pos_);
702  d4_assert(0 <= i && i < z);
703
704  ClearLast(i);
705
706  c4_View bv = _pBlock(_base[i]);
707  d4_assert(0 <= pos_ && pos_ <= bv.GetSize());
708
709  int todo = count_;
710
711  // optimize if the deletion goes past the end of this block...
712  int overshoot = pos_ + count_ - bv.GetSize();
713  if (overshoot > 0) {
714
715    // first, delete blocks which are going away completely
716    while (i + 1 < _offsets.GetSize()) {
717      int nextsize = _offsets.GetAt(i + 1) - _offsets.GetAt(i);
718      if (overshoot < nextsize)
719        break;
720      todo -= nextsize;
721      overshoot -= nextsize;
722
723      // drop the block and forget it ever existed
724      for (int j = i + 1; j < z; ++j)
725        _offsets.SetAt(j, _offsets.GetAt(j) - nextsize);
726      _offsets.RemoveAt(i + 1);
727
728      _base.RemoveAt(i + 1);
729      --z;
730      c4_View bz = _pBlock(_base[z]);
731      bz.RemoveAt(i);
732
733      Validate();
734    }
735
736    // delete before merging, to avoid useless copying
737    if (overshoot > 1) {
738      c4_View bv2 = _pBlock(_base[i + 1]);
739      bv2.RemoveAt(0, overshoot - 1);
740      todo -= overshoot - 1;
741
742      for (int j = i + 1; j < z; ++j)
743        _offsets.SetAt(j, _offsets.GetAt(j) - (overshoot - 1));
744
745      // if the next block is filled enough, rotate the separator
746      // this avoids an expensive and unnecessary merge + split
747      if (bv2.GetSize() > kLimit / 2) {
748        c4_View bz = _pBlock(_base[z]);
749        bz[i] = bv2[0];
750        bv2.RemoveAt(0);
751        --todo;
752        d4_assert(pos_ + todo <= bv.GetSize());
753        d4_assert(i < _offsets.GetSize());
754
755        for (int j = i + 1; j < z; ++j)
756          _offsets.SetAt(j, _offsets.GetAt(j) - 1);
757      }
758    }
759
760    // merge into one block
761    if (pos_ + todo > bv.GetSize()) {
762      d4_assert(i < z - 1);
763      Merge(i);
764      --z;
765    }
766  }
767  d4_assert(pos_ + todo <= bv.GetSize());
768
769  // now remove the rows and adjust offsets
770  if (todo > 0)
771    bv.RemoveAt(pos_, todo);
772
773  for (int j = i; j < z; ++j)
774    _offsets.SetAt(j, _offsets.GetAt(j) - todo);
775
776  // if the block underflows, merge it
777  if (bv.GetSize() < kLimit / 2) {
778    if (i > 0)
779    // merge with predecessor, preferably
780      bv = _pBlock(_base[--i]);
781    if (i >= z - 1)
782    // unless there is no successor to merge with
783      return true;
784    Merge(i);
785  }
786
787  // if the block overflows, split it
788  if (bv.GetSize() > kLimit)
789    Split(i, bv.GetSize() / 2);
790
791  Validate();
792
793  return true;
794}
795
796/////////////////////////////////////////////////////////////////////////////
797
798class c4_OrderedViewer: public c4_CustomViewer {
799    c4_View _base;
800    int _numKeys;
801
802    int KeyCompare(int row_, c4_Cursor cursor_)const;
803
804  public:
805    c4_OrderedViewer(c4_Sequence &seq_, int numKeys_);
806    virtual ~c4_OrderedViewer();
807
808    virtual c4_View GetTemplate();
809    virtual int GetSize();
810    virtual int Lookup(c4_Cursor key_, int &count_);
811    virtual bool GetItem(int row_, int col_, c4_Bytes &buf_);
812    virtual bool SetItem(int row_, int col_, const c4_Bytes &buf_);
813    virtual bool InsertRows(int pos_, c4_Cursor value_, int count_ = 1);
814    virtual bool RemoveRows(int pos_, int count_ = 1);
815};
816
817/////////////////////////////////////////////////////////////////////////////
818
819c4_OrderedViewer::c4_OrderedViewer(c4_Sequence &seq_, int numKeys_): _base
820  (&seq_), _numKeys(numKeys_){}
821
822c4_OrderedViewer::~c4_OrderedViewer(){}
823
824int c4_OrderedViewer::KeyCompare(int row_, c4_Cursor cursor_)const {
825  for (int i = 0; i < _numKeys; ++i) {
826    c4_Bytes buffer;
827    _base.GetItem(row_, i, buffer);
828
829    c4_Handler &h = cursor_._seq->NthHandler(i);
830    int f = h.Compare(cursor_._index, buffer);
831    if (f != 0)
832      return f;
833  }
834
835  return 0;
836}
837
838c4_View c4_OrderedViewer::GetTemplate() {
839  return _base.Clone();
840}
841
842int c4_OrderedViewer::GetSize() {
843  return _base.GetSize();
844}
845
846int c4_OrderedViewer::Lookup(c4_Cursor key_, int &count_) {
847  // can only use bsearch if the properties match the query
848  // XXX with ord1.tcl (dict words), this loop takes 300 uS!
849  c4_View kv = (*key_).Container();
850  for (int k = 0; k < _numKeys; ++k)
851    if (kv.FindProperty(_base.NthProperty(k).GetId()) < 0)
852      return  - 1;
853
854#if 0 // Locate gets the count wrong, it seems 2000-06-15
855  int pos;
856  count_ = _base.Locate(*key_, &pos);
857#else
858  int pos = _base.Search(*key_);
859  count_ = pos < _base.GetSize() && KeyCompare(pos, key_) == 0 ? 1 : 0;
860#endif
861  return pos;
862}
863
864bool c4_OrderedViewer::GetItem(int row_, int col_, c4_Bytes &buf_) {
865  return _base.GetItem(row_, col_, buf_);
866}
867
868bool c4_OrderedViewer::SetItem(int row_, int col_, const c4_Bytes &buf_) {
869  if (col_ < _numKeys) {
870    c4_Bytes temp;
871    _base.GetItem(row_, col_, temp);
872    if (buf_ == temp)
873      return true;
874    // this call will have no effect, just ignore it
875  }
876
877  _base.SetItem(row_, col_, buf_);
878
879  if (col_ < _numKeys) {
880    c4_Row copy = _base[row_];
881    // have to remove the row because it messes up searching
882    // it would be more efficient to search *around* this row
883    // or perhaps figure out new pos before changing any data
884    RemoveRows(row_);
885    InsertRows(0, &copy); // position is ignored
886  }
887
888  return true;
889}
890
891bool c4_OrderedViewer::InsertRows(int, c4_Cursor value_, int count_) {
892  d4_assert(count_ > 0);
893
894  int n;
895  int i = Lookup(value_, n);
896
897  // XXX if the lookup does not work, then insert as first element (!?)
898  d4_assert(i >= 0);
899  if (i < 0)
900    i = 0;
901
902  if (n == 0)
903    _base.InsertAt(i,  *value_);
904  else {
905    d4_assert(i < _base.GetSize());
906    _base.SetAt(i,  *value_); // replace existing
907  }
908
909  return true;
910}
911
912bool c4_OrderedViewer::RemoveRows(int pos_, int count_) {
913  _base.RemoveAt(pos_, count_);
914  return true;
915}
916
917/////////////////////////////////////////////////////////////////////////////
918
919class c4_IndexedViewer: public c4_CustomViewer {
920    c4_View _base;
921    c4_View _map;
922    c4_View _props;
923    bool _unique;
924    c4_IntProp _mapProp;
925
926    int KeyCompare(int row_, c4_Cursor cursor_)const;
927
928  public:
929    c4_IndexedViewer(c4_Sequence &seq_, c4_Sequence &map_, const c4_View
930      &props_, bool unique_);
931    virtual ~c4_IndexedViewer();
932
933    virtual c4_View GetTemplate();
934    virtual int GetSize();
935    virtual int Lookup(c4_Cursor key_, int &count_);
936    virtual bool GetItem(int row_, int col_, c4_Bytes &buf_);
937    virtual bool SetItem(int row_, int col_, const c4_Bytes &buf_);
938    virtual bool InsertRows(int pos_, c4_Cursor value_, int count_ = 1);
939    virtual bool RemoveRows(int pos_, int count_ = 1);
940};
941
942/////////////////////////////////////////////////////////////////////////////
943
944c4_IndexedViewer::c4_IndexedViewer(c4_Sequence &seq_, c4_Sequence &map_, const
945  c4_View &props_, bool unique_): _base(&seq_), _map(&map_), _props(props_),
946  _unique(unique_), _mapProp((const c4_IntProp &)_map.NthProperty(0)) {
947  int n = _base.GetSize();
948  if (_map.GetSize() != n) {
949    c4_View sorted = _base.SortOn(_props);
950
951    _map.SetSize(n);
952    for (int i = 0; i < n; ++i)
953      _mapProp(_map[i]) = _base.GetIndexOf(sorted[i]);
954  }
955}
956
957c4_IndexedViewer::~c4_IndexedViewer(){}
958
959int c4_IndexedViewer::KeyCompare(int row_, c4_Cursor cursor_)const {
960  int n = _props.NumProperties();
961  for (int i = 0; i < n; ++i) {
962    c4_Bytes buffer;
963    _base.GetItem(row_, i, buffer);
964
965    c4_Handler &h = cursor_._seq->NthHandler(i);
966    int f = h.Compare(cursor_._index, buffer);
967    if (f != 0)
968      return f;
969  }
970
971  return 0;
972}
973
974c4_View c4_IndexedViewer::GetTemplate() {
975  return _base.Clone();
976}
977
978int c4_IndexedViewer::GetSize() {
979  return _base.GetSize();
980}
981
982int c4_IndexedViewer::Lookup(c4_Cursor key_, int &count_) {
983  // can only use bsearch if the properties match the query
984  // XXX with ord1.tcl (dict words), this loop takes 300 uS!
985  c4_View kv = (*key_).Container();
986  int n = _props.NumProperties();
987  for (int k = 0; k < n; ++k)
988    if (kv.FindProperty(_props.NthProperty(k).GetId()) < 0)
989      return  - 1;
990
991#if 0 // Locate gets the count wrong, it seems 2000-06-15
992  int pos;
993  count_ = _base.Locate(*key_, &pos);
994#else
995  int pos = _base.Search(*key_);
996  count_ = pos < _base.GetSize() && KeyCompare(pos, key_) == 0 ? 1 : 0;
997#endif
998  return pos;
999}
1000
1001bool c4_IndexedViewer::GetItem(int row_, int col_, c4_Bytes &buf_) {
1002  return _base.GetItem(row_, col_, buf_);
1003}
1004
1005bool c4_IndexedViewer::SetItem(int row_, int col_, const c4_Bytes &buf_) {
1006  const int id = _base.NthProperty(col_).GetId();
1007  const bool keyMod = _props.FindProperty(id) >= 0;
1008
1009  if (keyMod) {
1010    c4_Bytes temp;
1011    _base.GetItem(row_, col_, temp);
1012    if (buf_ == temp)
1013      return true;
1014    // this call will have no effect, just ignore it
1015  }
1016
1017  _base.SetItem(row_, col_, buf_);
1018
1019  if (keyMod) {
1020    // TODO adjust index
1021  }
1022
1023  return true;
1024}
1025
1026bool c4_IndexedViewer::InsertRows(int, c4_Cursor value_, int count_) {
1027  d4_assert(count_ > 0);
1028
1029  if (_unique)
1030    count_ = 1;
1031
1032  int n;
1033  int i = Lookup(value_, n);
1034
1035  // XXX if the lookup does not work, then insert as first element (!?)
1036  d4_assert(i >= 0);
1037  if (i < 0)
1038    i = 0;
1039
1040  if (n == 0)
1041    _base.InsertAt(i,  *value_);
1042  else {
1043    d4_assert(i < _base.GetSize());
1044    _base.SetAt(i,  *value_); // replace existing
1045  }
1046
1047  return true;
1048}
1049
1050bool c4_IndexedViewer::RemoveRows(int pos_, int count_) {
1051  _base.RemoveAt(pos_, count_);
1052
1053  int n = _map.GetSize();
1054  while (--n >= 0) {
1055    int v = _mapProp(_map[n]);
1056    if (v >= pos_)
1057      if (v < pos_ + count_)
1058        _map.RemoveAt(n);
1059      else
1060        _mapProp(_map[n]) = v - count_;
1061  }
1062
1063  return true;
1064}
1065
1066/////////////////////////////////////////////////////////////////////////////
1067
1068c4_CustomViewer *f4_CreateReadOnly(c4_Sequence &seq_) {
1069  return d4_new c4_ReadOnlyViewer(seq_);
1070}
1071
1072c4_CustomViewer *f4_CreateHash(c4_Sequence &seq_, int nk_, c4_Sequence *map_) {
1073  return d4_new c4_HashViewer(seq_, nk_, map_);
1074}
1075
1076c4_CustomViewer *f4_CreateBlocked(c4_Sequence &seq_) {
1077  return d4_new c4_BlockedViewer(seq_);
1078}
1079
1080c4_CustomViewer *f4_CreateOrdered(c4_Sequence &seq_, int nk_) {
1081  return d4_new c4_OrderedViewer(seq_, nk_);
1082}
1083
1084c4_CustomViewer *f4_CreateIndexed(c4_Sequence &seq_, c4_Sequence &map_, const
1085  c4_View &props_, bool unique_) {
1086  return d4_new c4_IndexedViewer(seq_, map_, props_, unique_);
1087}
1088
1089/////////////////////////////////////////////////////////////////////////////
1090