1// remap.cpp -- 2// $Id: remap.cpp 1230 2007-03-09 15:58:53Z jcw $ 3// This is part of Metakit, the homepage is http://www.equi4.com/metakit.html 4 5/** @file 6 * Mapping and remapping custom viewers 7 */ 8 9#include "header.h" 10#include "remap.h" 11#include "handler.h" 12 13///////////////////////////////////////////////////////////////////////////// 14 15class c4_ReadOnlyViewer: public c4_CustomViewer { 16 c4_View _base; 17 18 public: 19 c4_ReadOnlyViewer(c4_Sequence &seq_): _base(&seq_){} 20 virtual ~c4_ReadOnlyViewer(){} 21 22 virtual c4_View GetTemplate() { 23 return _base.Clone(); 24 } 25 virtual int GetSize() { 26 return _base.GetSize(); 27 } 28 29 virtual int Lookup(c4_Cursor key_, int &count_) { 30 int pos = 0; 31 count_ = _base.GetSize(); 32 return _base.RestrictSearch(*key_, pos, count_); 33 } 34 35 virtual bool GetItem(int row_, int col_, c4_Bytes &buf_) { 36 return _base.GetItem(row_, col_, buf_); 37 } 38}; 39 40///////////////////////////////////////////////////////////////////////////// 41 42class c4_HashViewer: public c4_CustomViewer { 43 c4_View _base; 44 c4_View _map; 45 int _numKeys; 46 47 c4_IntProp _pHash; 48 c4_IntProp _pRow; 49 50 bool KeySame(int row_, c4_Cursor cursor_)const; 51 t4_i32 CalcHash(c4_Cursor cursor_)const; 52 int LookDict(t4_i32 hash_, c4_Cursor cursor_)const; 53 void InsertDict(int row_); 54 void RemoveDict(int pos_); 55 bool DictResize(int minused); 56 57 int Row(int i_)const { 58 return _pRow(_map[i_]); 59 } 60 int Hash(int i_)const { 61 return _pHash(_map[i_]); 62 } 63 64 void SetRow(int i_, int v_) { 65 _pRow(_map[i_]) = v_; 66 } 67 void SetHash(int i_, int v_) { 68 _pHash(_map[i_]) = v_; 69 } 70 71 bool IsUnused(int)const; 72 bool IsDummy(int)const; 73 bool IsActive(int i_)const { 74 return Row(i_) >= 0; 75 } 76 77 int GetPoly()const; 78 void SetPoly(int v_); 79 int GetSpare()const; 80 void SetSpare(int v_); 81 82 public: 83 c4_HashViewer(c4_Sequence &seq_, int numKeys_, c4_Sequence *map_ = 0); 84 virtual ~c4_HashViewer(); 85 86 virtual c4_View GetTemplate(); 87 virtual int GetSize(); 88 virtual int Lookup(c4_Cursor key_, int &count_); 89 virtual bool GetItem(int row_, int col_, c4_Bytes &buf_); 90 virtual bool SetItem(int row_, int col_, const c4_Bytes &buf_); 91 virtual bool InsertRows(int pos_, c4_Cursor value_, int count_ = 1); 92 virtual bool RemoveRows(int pos_, int count_ = 1); 93}; 94 95///////////////////////////////////////////////////////////////////////////// 96// The following contains code derived froms Python's dictionaries, hence: 97// Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam, 98// The Netherlands. 99// Reduced and turned into a fast C++ class by Christian Tismer, hence: 100// Copyright 1999 by Christian Tismer. 101// Vectorized and reorganized further by Jean-Claude Wippler. 102///////////////////////////////////////////////////////////////////////////// 103 104// Table of irreducible polynomials to efficiently cycle through 105// GF(2^n)-{0}, 2<=n<=30. 106 107static long s_polys[] = { 108 4+3, 8+3, 16+3, 32+5, 64+3, 128+3, 256+29, 512+17, 1024+9, 2048+5, 4096+83, 109 8192+27, 16384+43, 32768+3, 65536+45, 131072+9, 262144+39, 524288+39, 110 1048576+9, 2097152+5, 4194304+3, 8388608+33, 16777216+27, 33554432+9, 111 67108864+71, 134217728+39, 268435456+9, 536870912+5, 1073741824+83, 0 112}; 113 114///////////////////////////////////////////////////////////////////////////// 115 116c4_HashViewer::c4_HashViewer(c4_Sequence &seq_, int numKeys_, c4_Sequence *map_) 117 : _base(&seq_), _map(map_), _numKeys(numKeys_), _pHash("_H"), _pRow("_R") { 118 if (_map.GetSize() == 0) 119 _map.SetSize(1); 120 121 int poly = GetPoly(); 122 if (poly == 0 || _map.GetSize() <= _base.GetSize()) 123 DictResize(_base.GetSize()); 124} 125 126c4_HashViewer::~c4_HashViewer(){} 127 128bool c4_HashViewer::IsUnused(int row_)const { 129 c4_RowRef r = _map[row_]; 130 return _pRow(r) < 0 && _pHash(r) == 0; 131} 132 133bool c4_HashViewer::IsDummy(int row_)const { 134 c4_RowRef r = _map[row_]; 135 return _pRow(r) < 0 && _pHash(r) < 0; 136} 137 138int c4_HashViewer::GetPoly()const { 139 return Hash(_map.GetSize() - 1); 140} 141 142void c4_HashViewer::SetPoly(int v_) { 143 SetHash(_map.GetSize() - 1, v_); 144} 145 146int c4_HashViewer::GetSpare()const { 147 return Row(_map.GetSize() - 1); 148} 149 150void c4_HashViewer::SetSpare(int v_) { 151 SetRow(_map.GetSize() - 1, v_); 152} 153 154bool c4_HashViewer::KeySame(int row_, c4_Cursor cursor_)const { 155 for (int i = 0; i < _numKeys; ++i) { 156 c4_Bytes buffer; 157 _base.GetItem(row_, i, buffer); 158 159 c4_Handler &h = cursor_._seq->NthHandler(i); 160 if (h.Compare(cursor_._index, buffer) != 0) 161 return false; 162 } 163 164 return true; 165} 166 167/// Create mapped view which is uses a second view for hashing 168t4_i32 c4_HashViewer::CalcHash(c4_Cursor cursor_)const { 169 c4_Bytes buffer, buf2; 170 const t4_i32 endian = 0x03020100; 171 t4_i32 hash = 0; 172 173 for (int i = 0; i < _numKeys; ++i) { 174 c4_Handler &h = cursor_._seq->NthHandler(i); 175 cursor_._seq->Get(cursor_._index, h.PropId(), buffer); 176 177 // this code borrows from Python's stringobject.c/string_hash() 178 int len = buffer.Size(); 179 if (len > 0) { 180 const t4_byte *p = buffer.Contents(); 181 182 // 20030218: careful to avoid endian-ness sensitivity 183 if (*(const t4_byte*) &endian) 184 // true on big-endian systems 185 switch (h.Property().Type()) { 186 case 'I': 187 case 'L': 188 case 'F': 189 case 'D': 190 { 191 t4_byte *q = buf2.SetBuffer(len); 192 for (int j = 0; j < len; ++j) 193 q[len - j - 1] = p[j]; 194 p = q; 195 } 196 } 197 198 long x = *p << 7; 199 200 // modifications are risky, this code avoid scanning huge blobs 201 if (len > 200) 202 len = 100; 203 204 while (--len >= 0) 205 x = (1000003 *x) ^ *p++; 206 207 if (buffer.Size() > 200) { 208 len = 100; 209 p += buffer.Size() - 200; 210 while (--len >= 0) 211 x = (1000003 *x) ^ *p++; 212 } 213 214 x ^= buffer.Size(); 215 hash ^= x ^ i; 216 } 217 } 218 219 if (hash == 0) 220 hash = - 1; 221 222 return hash; 223} 224 225/* 226 * Types of slots: 227 * Unused: row = -1, hash = 0 228 * Dummy: row = -1, hash = -1 229 * Active: row >= 0 230 * There must be at least one Unused slot at all times. 231 */ 232 233int c4_HashViewer::LookDict(t4_i32 hash_, c4_Cursor cursor_)const { 234 const unsigned int mask = _map.GetSize() - 2; 235 /* We must come up with (i, incr) such that 0 <= i < _size 236 and 0 < incr < _size and both are a function of hash */ 237 int i = mask &~hash_; 238 /* We use ~hash_ instead of hash_, as degenerate hash functions, such 239 as for ints <sigh>, can have lots of leading zeros. It's not 240 really a performance risk, but better safe than sorry. */ 241 if (IsUnused(i) || Hash(i) == hash_ && KeySame(Row(i), cursor_)) 242 return i; 243 244 int freeslot = IsDummy(i) ? i : - 1; 245 246 /* Derive incr from hash_, just to make it more arbitrary. Note that 247 incr must not be 0, or we will get into an infinite loop.*/ 248 unsigned incr = (hash_ ^ ((unsigned long)hash_ >> 3)) &mask; 249 if (!incr) 250 incr = mask; 251 252 int poly = GetPoly(); 253 for (;;) { 254 i = (i + incr) &mask; 255 if (IsUnused(i)) 256 break; 257 if (Hash(i) == hash_ && KeySame(Row(i), cursor_)) 258 return i; 259 if (freeslot == - 1 && IsDummy(i)) 260 freeslot = i; 261 /* Cycle through GF(2^n)-{0} */ 262 incr = incr << 1; 263 if (incr > mask) 264 incr ^= poly; 265 /* This will implicitely clear the highest bit */ 266 } 267 268 return freeslot != - 1 ? freeslot : i; 269} 270 271void c4_HashViewer::InsertDict(int row_) { 272 c4_Cursor cursor = &_base[row_]; 273 274 t4_i32 hash = CalcHash(cursor); 275 int i = LookDict(hash, cursor); 276 277 if (IsDummy(i)) { 278 int n = GetSpare(); 279 d4_assert(n > 0); 280 SetSpare(n - 1); 281 } 282 283 SetHash(i, hash); 284 SetRow(i, row_); 285} 286 287void c4_HashViewer::RemoveDict(int pos_) { 288 c4_Cursor key = &_base[pos_]; 289 t4_i32 hash = CalcHash(key); 290 int i = LookDict(hash, key); 291 d4_assert(i >= 0); 292 293 d4_assert(Row(i) == pos_); 294 295 SetHash(i, - 1); 296 SetRow(i, - 1); 297 298 SetSpare(GetSpare() + 1); 299} 300 301bool c4_HashViewer::DictResize(int minused) { 302 int i, newsize, newpoly; 303 for (i = 0, newsize = 4;; i++, newsize <<= 1) { 304 if (s_polys[i] == 0) 305 return false; 306 else if (newsize > minused) { 307 newpoly = s_polys[i]; 308 break; 309 } 310 } 311 312 _map.SetSize(0); 313 314 c4_Row empty; 315 _pRow(empty) = - 1; 316 _map.InsertAt(0, empty, newsize + 1); 317 318 SetPoly(newpoly); 319 SetSpare(0); 320 321 for (int j = 0; j < _base.GetSize(); ++j) 322 InsertDict(j); 323 324 return true; 325} 326 327c4_View c4_HashViewer::GetTemplate() { 328 return _base.Clone(); 329} 330 331int c4_HashViewer::GetSize() { 332 return _base.GetSize(); 333} 334 335int c4_HashViewer::Lookup(c4_Cursor key_, int &count_) { 336 // can only use hashing if the properties match the query 337 // XXX it appears that this loop takes some 300 uS! 338 c4_View kv = (*key_).Container(); 339 for (int k = 0; k < _numKeys; ++k) 340 if (kv.FindProperty(_base.NthProperty(k).GetId()) < 0) 341 return - 1; 342 343 t4_i32 hash = CalcHash(key_); // TODO should combine with above loop 344 int i = LookDict(hash, key_); 345 346 int row = Row(i); 347 count_ = row >= 0 && KeySame(row, key_) ? 1 : 0; 348 return count_ ? row : 0; // don't return -1, we *know* it's not there 349} 350 351bool c4_HashViewer::GetItem(int row_, int col_, c4_Bytes &buf_) { 352 return _base.GetItem(row_, col_, buf_); 353} 354 355bool c4_HashViewer::SetItem(int row_, int col_, const c4_Bytes &buf_) { 356 if (col_ < _numKeys) { 357 c4_Bytes temp; 358 _base.GetItem(row_, col_, temp); 359 if (buf_ == temp) 360 return true; 361 // this call will have no effect, just ignore it 362 363 RemoveDict(row_); 364 } 365 366 _base.SetItem(row_, col_, buf_); 367 368 if (col_ < _numKeys) { 369 // careful if changing a key to one which is already present: 370 // in that case, delete the other row to preserve uniqueness 371 // 372 // Note: this is a tricky and confusing issue, because now the 373 // mere act of *setting* a property value can *delete* a row! 374 // 375 // The big problem here is that setting the rest of the values 376 // in a loop can end up *wrong*, if the row has moved down!!! 377 int n; 378 int i = Lookup(&_base[row_], n); 379 if (i >= 0 && n > 0) { 380 RemoveRows(i, 1); 381 if (i < row_) 382 --row_; 383 } 384 385 InsertDict(row_); 386 } 387 388 return true; 389} 390 391bool c4_HashViewer::InsertRows(int pos_, c4_Cursor value_, int count_) { 392 d4_assert(count_ > 0); 393 394 int n; 395 int i = Lookup(value_, n); 396 if (i >= 0 && n > 0) { 397 _base.SetAt(i, *value_); // replace existing 398 return true; 399 } 400 401 // adjust row numbers if the insertion is not at the end 402 // 403 // TODO this could be optimized to go through the rows which 404 // were moved up, and then adjusting the map through a lookup 405 // (probably better than full scan if pos_ is relatively high) 406 if (pos_ < _base.GetSize()) { 407 for (int r = 0; r < _map.GetSize() - 1; ++r) { 408 int n2 = Row(r); 409 if (n2 >= pos_) 410 SetRow(r, n2 + 1); 411 } 412 } 413 414 _base.InsertAt(pos_, *value_); 415 InsertDict(pos_); 416 417 int used = _base.GetSize(); 418 int fill = used + GetSpare(); 419 if (fill *3 >= (_map.GetSize() - 1) *2 && !DictResize(used *2)) 420 return false; 421 // bail out 422 423 d4_assert(_base.GetSize() + GetSpare() < _map.GetSize() - 1); 424 return true; 425} 426 427bool c4_HashViewer::RemoveRows(int pos_, int count_) { 428 while (--count_ >= 0) { 429 // since the map persists, be somewhat more aggressive than the 430 // original code in resizing down when the map is getting empty 431 if (_base.GetSize() *3 < _map.GetSize() - 1 && !DictResize(_base.GetSize())) 432 return false; 433 // bail out 434 435 RemoveDict(pos_); 436 437 // move rows down for now 438 // 439 // optionally: consider replacing with last entry (much faster) 440 for (int r = 0; r < _map.GetSize() - 1; ++r) { 441 int n = Row(r); 442 if (n > pos_) 443 SetRow(r, n - 1); 444 } 445 446 _base.RemoveAt(pos_, 1); 447 } 448 449 return true; 450} 451 452///////////////////////////////////////////////////////////////////////////// 453 454class c4_BlockedViewer: public c4_CustomViewer { 455 enum { 456 kLimit = 1000 457 }; 458 459 c4_View _base; 460 461 c4_ViewProp _pBlock; 462 c4_DWordArray _offsets; 463 464 int _last_base, _last_limit, _last_slot; 465 c4_View _last_view; 466 467 int Slot(int &pos_); 468 void Split(int block_, int row_); 469 void Merge(int block_); 470 void Validate()const; 471 472 // 2004-04-27 new cache logic, thx MB 473 void SetLast(int row_); 474 void ClearLast(int slot_) { 475 if (_last_slot >= slot_) { 476 _last_limit = _last_slot = - 1; 477 _last_view = c4_View(); 478 } 479 } 480 481 public: 482 c4_BlockedViewer(c4_Sequence &seq_); 483 virtual ~c4_BlockedViewer(); 484 485 virtual c4_View GetTemplate(); 486 virtual int GetSize(); 487 virtual bool GetItem(int row_, int col_, c4_Bytes &buf_); 488 virtual bool SetItem(int row_, int col_, const c4_Bytes &buf_); 489 virtual bool InsertRows(int pos_, c4_Cursor value_, int count_ = 1); 490 virtual bool RemoveRows(int pos_, int count_ = 1); 491}; 492 493///////////////////////////////////////////////////////////////////////////// 494 495#if q4_CHECK 496 497// debugging version to verify that the internal data is consistent 498void c4_BlockedViewer::Validate()const { 499 d4_assert(_base.GetSize() >= 2); 500 501 int n = _base.GetSize() - 1; 502 d4_assert(_offsets.GetSize() == n); 503 504 int total = 0; 505 for (int i = 0; i < n; i++) { 506 c4_View bv = _pBlock(_base[i]); 507 d4_assert(bv.GetSize() > 0 || i == 0); 508 total += bv.GetSize(); 509 d4_assert((int)_offsets.GetAt(i) == total++); 510 } 511 512 c4_View be = _pBlock(_base[n]); 513 d4_assert(be.GetSize() == n - 1); 514} 515 516#else 517 518// nothing, so inline this thing to avoid even the calling overhead 519d4_inline void c4_BlockedViewer::Validate()const{} 520 521#endif 522 523c4_BlockedViewer::c4_BlockedViewer(c4_Sequence &seq_): _base(&seq_), _pBlock( 524 "_B"), _last_base( - 1), _last_limit( - 1), _last_slot( - 1) { 525 if (_base.GetSize() < 2) 526 _base.SetSize(2); 527 528 int n = _base.GetSize() - 1; 529 _offsets.SetSize(n); 530 531 int total = 0; 532 for (int i = 0; i < n; i++) { 533 c4_View bv = _pBlock(_base[i]); 534 total += bv.GetSize(); 535 _offsets.SetAt(i, total++); 536 } 537 Validate(); 538} 539 540c4_BlockedViewer::~c4_BlockedViewer(){} 541 542int c4_BlockedViewer::Slot(int &pos_) { 543 const int n = _offsets.GetSize(); 544 545 d4_assert(n > 0); 546 d4_assert(pos_ <= (int)_offsets.GetAt(n - 1)); 547 548#if 0 549 // not sure the following adds any performance (the logic looks ok) 550 // reason: won't work if inserts/removes always invalidate the cache 551 if (_last_base <= pos_ && pos_ < _last_limit) { 552 pos_ -= _last_base; 553 return _last_slot; 554 } 555#endif 556 557#if 0 558 int h; 559 for (h = 0; h < n; ++h) 560 if (pos_ <= (t4_i32)_offsets.GetAt(h)) 561 break; 562#else 563 // switch to binary search, adapted from code by Zhang Dehua, 28-3-2002 564 // slows down some 5%, but said to be much better with 5 million rows 565 int l = 0, h = n - 1; 566 while (l < h) { 567 int m = l + (h - l) / 2; 568 if ((t4_i32)_offsets.GetAt(m) < pos_) 569 l = m + 1; 570 else 571 h = m; 572 } 573#endif 574 575 if (h > 0) 576 pos_ -= _offsets.GetAt(h - 1) + 1; 577 578 return h; 579} 580 581void c4_BlockedViewer::Split(int bno_, int row_) { 582 ClearLast(bno_); 583 584 int z = _offsets.GetSize(); 585 d4_assert(bno_ < z); 586 c4_View bz = _pBlock(_base[z]); 587 c4_View bv = _pBlock(_base[bno_]); 588 d4_assert(row_ < bv.GetSize()); 589 590 _offsets.InsertAt(bno_, _offsets.GetAt(bno_) - bv.GetSize() + row_); 591 592 _base.InsertAt(bno_ + 1, c4_Row()); 593 c4_View bn = _pBlock(_base[bno_ + 1]); 594 595 bv.RelocateRows(row_ + 1, - 1, bn, 0); 596 bv.RelocateRows(row_, 1, bz, bno_); 597 598 Validate(); 599} 600 601void c4_BlockedViewer::Merge(int bno_) { 602 ClearLast(bno_); 603 604 int z = _offsets.GetSize(); 605 c4_View bz = _pBlock(_base[z]); 606 c4_View bv1 = _pBlock(_base[bno_]); 607 c4_View bv2 = _pBlock(_base[bno_ + 1]); 608 609 _offsets.RemoveAt(bno_); 610 611 bz.RelocateRows(bno_, 1, bv1, - 1); 612 bv2.RelocateRows(0, - 1, bv1, - 1); 613 614 _base.RemoveAt(bno_ + 1); 615 616 Validate(); 617} 618 619c4_View c4_BlockedViewer::GetTemplate() { 620 c4_View bv = _pBlock(_base[0]); 621 return bv.Clone(); 622} 623 624int c4_BlockedViewer::GetSize() { 625 int n = _offsets.GetAt(_offsets.GetSize() - 1); 626 d4_assert(n >= 0); 627 return n; 628} 629 630void c4_BlockedViewer::SetLast(int row_) { 631 int orig = row_; 632 633 int i = Slot(row_); 634 d4_assert(0 <= i && i < _offsets.GetSize()); 635 636 _last_limit = _offsets.GetAt(i); 637 638 if (_last_limit == orig) { 639 row_ = i; 640 i = _offsets.GetSize(); 641 _last_limit = 0; // force miss next time, but view is still cached 642 } 643 644 if (i != _last_slot) { 645 _last_slot = i; 646 _last_view = _pBlock(_base[i]); 647 } 648 649 _last_base = orig - row_; 650} 651 652bool c4_BlockedViewer::GetItem(int row_, int col_, c4_Bytes &buf_) { 653 if (row_ < _last_base || row_ >= _last_limit) 654 SetLast(row_); 655 return _last_view.GetItem(row_ - _last_base, col_, buf_); 656} 657 658bool c4_BlockedViewer::SetItem(int row_, int col_, const c4_Bytes &buf_) { 659 if (row_ < _last_base || row_ >= _last_limit) 660 SetLast(row_); 661 _last_view.SetItem(row_ - _last_base, col_, buf_); 662 return true; 663} 664 665bool c4_BlockedViewer::InsertRows(int pos_, c4_Cursor value_, int count_) { 666 d4_assert(count_ > 0); 667 668 bool atEnd = pos_ == GetSize(); 669 670 int z = _offsets.GetSize(); 671 int i = Slot(pos_); 672 d4_assert(0 <= i && i < z); 673 674 ClearLast(i); 675 676 c4_View bv = _pBlock(_base[i]); 677 d4_assert(0 <= pos_ && pos_ <= bv.GetSize()); 678 679 bv.InsertAt(pos_, *value_, count_); 680 for (int j = i; j < z; ++j) 681 _offsets.SetAt(j, _offsets.GetAt(j) + count_); 682 683 // massive insertions are first split off 684 while (bv.GetSize() >= 2 *kLimit) 685 Split(i, bv.GetSize() - kLimit - 2); 686 687 if (bv.GetSize() > kLimit) 688 Split(i, atEnd ? kLimit - 1: bv.GetSize() / 2); 689 // 23-3-2002, from MB 690 691 Validate(); 692 693 return true; 694} 695 696bool c4_BlockedViewer::RemoveRows(int pos_, int count_) { 697 d4_assert(count_ > 0); 698 d4_assert(pos_ + count_ <= GetSize()); 699 700 int z = _offsets.GetSize(); 701 int i = Slot(pos_); 702 d4_assert(0 <= i && i < z); 703 704 ClearLast(i); 705 706 c4_View bv = _pBlock(_base[i]); 707 d4_assert(0 <= pos_ && pos_ <= bv.GetSize()); 708 709 int todo = count_; 710 711 // optimize if the deletion goes past the end of this block... 712 int overshoot = pos_ + count_ - bv.GetSize(); 713 if (overshoot > 0) { 714 715 // first, delete blocks which are going away completely 716 while (i + 1 < _offsets.GetSize()) { 717 int nextsize = _offsets.GetAt(i + 1) - _offsets.GetAt(i); 718 if (overshoot < nextsize) 719 break; 720 todo -= nextsize; 721 overshoot -= nextsize; 722 723 // drop the block and forget it ever existed 724 for (int j = i + 1; j < z; ++j) 725 _offsets.SetAt(j, _offsets.GetAt(j) - nextsize); 726 _offsets.RemoveAt(i + 1); 727 728 _base.RemoveAt(i + 1); 729 --z; 730 c4_View bz = _pBlock(_base[z]); 731 bz.RemoveAt(i); 732 733 Validate(); 734 } 735 736 // delete before merging, to avoid useless copying 737 if (overshoot > 1) { 738 c4_View bv2 = _pBlock(_base[i + 1]); 739 bv2.RemoveAt(0, overshoot - 1); 740 todo -= overshoot - 1; 741 742 for (int j = i + 1; j < z; ++j) 743 _offsets.SetAt(j, _offsets.GetAt(j) - (overshoot - 1)); 744 745 // if the next block is filled enough, rotate the separator 746 // this avoids an expensive and unnecessary merge + split 747 if (bv2.GetSize() > kLimit / 2) { 748 c4_View bz = _pBlock(_base[z]); 749 bz[i] = bv2[0]; 750 bv2.RemoveAt(0); 751 --todo; 752 d4_assert(pos_ + todo <= bv.GetSize()); 753 d4_assert(i < _offsets.GetSize()); 754 755 for (int j = i + 1; j < z; ++j) 756 _offsets.SetAt(j, _offsets.GetAt(j) - 1); 757 } 758 } 759 760 // merge into one block 761 if (pos_ + todo > bv.GetSize()) { 762 d4_assert(i < z - 1); 763 Merge(i); 764 --z; 765 } 766 } 767 d4_assert(pos_ + todo <= bv.GetSize()); 768 769 // now remove the rows and adjust offsets 770 if (todo > 0) 771 bv.RemoveAt(pos_, todo); 772 773 for (int j = i; j < z; ++j) 774 _offsets.SetAt(j, _offsets.GetAt(j) - todo); 775 776 // if the block underflows, merge it 777 if (bv.GetSize() < kLimit / 2) { 778 if (i > 0) 779 // merge with predecessor, preferably 780 bv = _pBlock(_base[--i]); 781 if (i >= z - 1) 782 // unless there is no successor to merge with 783 return true; 784 Merge(i); 785 } 786 787 // if the block overflows, split it 788 if (bv.GetSize() > kLimit) 789 Split(i, bv.GetSize() / 2); 790 791 Validate(); 792 793 return true; 794} 795 796///////////////////////////////////////////////////////////////////////////// 797 798class c4_OrderedViewer: public c4_CustomViewer { 799 c4_View _base; 800 int _numKeys; 801 802 int KeyCompare(int row_, c4_Cursor cursor_)const; 803 804 public: 805 c4_OrderedViewer(c4_Sequence &seq_, int numKeys_); 806 virtual ~c4_OrderedViewer(); 807 808 virtual c4_View GetTemplate(); 809 virtual int GetSize(); 810 virtual int Lookup(c4_Cursor key_, int &count_); 811 virtual bool GetItem(int row_, int col_, c4_Bytes &buf_); 812 virtual bool SetItem(int row_, int col_, const c4_Bytes &buf_); 813 virtual bool InsertRows(int pos_, c4_Cursor value_, int count_ = 1); 814 virtual bool RemoveRows(int pos_, int count_ = 1); 815}; 816 817///////////////////////////////////////////////////////////////////////////// 818 819c4_OrderedViewer::c4_OrderedViewer(c4_Sequence &seq_, int numKeys_): _base 820 (&seq_), _numKeys(numKeys_){} 821 822c4_OrderedViewer::~c4_OrderedViewer(){} 823 824int c4_OrderedViewer::KeyCompare(int row_, c4_Cursor cursor_)const { 825 for (int i = 0; i < _numKeys; ++i) { 826 c4_Bytes buffer; 827 _base.GetItem(row_, i, buffer); 828 829 c4_Handler &h = cursor_._seq->NthHandler(i); 830 int f = h.Compare(cursor_._index, buffer); 831 if (f != 0) 832 return f; 833 } 834 835 return 0; 836} 837 838c4_View c4_OrderedViewer::GetTemplate() { 839 return _base.Clone(); 840} 841 842int c4_OrderedViewer::GetSize() { 843 return _base.GetSize(); 844} 845 846int c4_OrderedViewer::Lookup(c4_Cursor key_, int &count_) { 847 // can only use bsearch if the properties match the query 848 // XXX with ord1.tcl (dict words), this loop takes 300 uS! 849 c4_View kv = (*key_).Container(); 850 for (int k = 0; k < _numKeys; ++k) 851 if (kv.FindProperty(_base.NthProperty(k).GetId()) < 0) 852 return - 1; 853 854#if 0 // Locate gets the count wrong, it seems 2000-06-15 855 int pos; 856 count_ = _base.Locate(*key_, &pos); 857#else 858 int pos = _base.Search(*key_); 859 count_ = pos < _base.GetSize() && KeyCompare(pos, key_) == 0 ? 1 : 0; 860#endif 861 return pos; 862} 863 864bool c4_OrderedViewer::GetItem(int row_, int col_, c4_Bytes &buf_) { 865 return _base.GetItem(row_, col_, buf_); 866} 867 868bool c4_OrderedViewer::SetItem(int row_, int col_, const c4_Bytes &buf_) { 869 if (col_ < _numKeys) { 870 c4_Bytes temp; 871 _base.GetItem(row_, col_, temp); 872 if (buf_ == temp) 873 return true; 874 // this call will have no effect, just ignore it 875 } 876 877 _base.SetItem(row_, col_, buf_); 878 879 if (col_ < _numKeys) { 880 c4_Row copy = _base[row_]; 881 // have to remove the row because it messes up searching 882 // it would be more efficient to search *around* this row 883 // or perhaps figure out new pos before changing any data 884 RemoveRows(row_); 885 InsertRows(0, ©); // position is ignored 886 } 887 888 return true; 889} 890 891bool c4_OrderedViewer::InsertRows(int, c4_Cursor value_, int count_) { 892 d4_assert(count_ > 0); 893 894 int n; 895 int i = Lookup(value_, n); 896 897 // XXX if the lookup does not work, then insert as first element (!?) 898 d4_assert(i >= 0); 899 if (i < 0) 900 i = 0; 901 902 if (n == 0) 903 _base.InsertAt(i, *value_); 904 else { 905 d4_assert(i < _base.GetSize()); 906 _base.SetAt(i, *value_); // replace existing 907 } 908 909 return true; 910} 911 912bool c4_OrderedViewer::RemoveRows(int pos_, int count_) { 913 _base.RemoveAt(pos_, count_); 914 return true; 915} 916 917///////////////////////////////////////////////////////////////////////////// 918 919class c4_IndexedViewer: public c4_CustomViewer { 920 c4_View _base; 921 c4_View _map; 922 c4_View _props; 923 bool _unique; 924 c4_IntProp _mapProp; 925 926 int KeyCompare(int row_, c4_Cursor cursor_)const; 927 928 public: 929 c4_IndexedViewer(c4_Sequence &seq_, c4_Sequence &map_, const c4_View 930 &props_, bool unique_); 931 virtual ~c4_IndexedViewer(); 932 933 virtual c4_View GetTemplate(); 934 virtual int GetSize(); 935 virtual int Lookup(c4_Cursor key_, int &count_); 936 virtual bool GetItem(int row_, int col_, c4_Bytes &buf_); 937 virtual bool SetItem(int row_, int col_, const c4_Bytes &buf_); 938 virtual bool InsertRows(int pos_, c4_Cursor value_, int count_ = 1); 939 virtual bool RemoveRows(int pos_, int count_ = 1); 940}; 941 942///////////////////////////////////////////////////////////////////////////// 943 944c4_IndexedViewer::c4_IndexedViewer(c4_Sequence &seq_, c4_Sequence &map_, const 945 c4_View &props_, bool unique_): _base(&seq_), _map(&map_), _props(props_), 946 _unique(unique_), _mapProp((const c4_IntProp &)_map.NthProperty(0)) { 947 int n = _base.GetSize(); 948 if (_map.GetSize() != n) { 949 c4_View sorted = _base.SortOn(_props); 950 951 _map.SetSize(n); 952 for (int i = 0; i < n; ++i) 953 _mapProp(_map[i]) = _base.GetIndexOf(sorted[i]); 954 } 955} 956 957c4_IndexedViewer::~c4_IndexedViewer(){} 958 959int c4_IndexedViewer::KeyCompare(int row_, c4_Cursor cursor_)const { 960 int n = _props.NumProperties(); 961 for (int i = 0; i < n; ++i) { 962 c4_Bytes buffer; 963 _base.GetItem(row_, i, buffer); 964 965 c4_Handler &h = cursor_._seq->NthHandler(i); 966 int f = h.Compare(cursor_._index, buffer); 967 if (f != 0) 968 return f; 969 } 970 971 return 0; 972} 973 974c4_View c4_IndexedViewer::GetTemplate() { 975 return _base.Clone(); 976} 977 978int c4_IndexedViewer::GetSize() { 979 return _base.GetSize(); 980} 981 982int c4_IndexedViewer::Lookup(c4_Cursor key_, int &count_) { 983 // can only use bsearch if the properties match the query 984 // XXX with ord1.tcl (dict words), this loop takes 300 uS! 985 c4_View kv = (*key_).Container(); 986 int n = _props.NumProperties(); 987 for (int k = 0; k < n; ++k) 988 if (kv.FindProperty(_props.NthProperty(k).GetId()) < 0) 989 return - 1; 990 991#if 0 // Locate gets the count wrong, it seems 2000-06-15 992 int pos; 993 count_ = _base.Locate(*key_, &pos); 994#else 995 int pos = _base.Search(*key_); 996 count_ = pos < _base.GetSize() && KeyCompare(pos, key_) == 0 ? 1 : 0; 997#endif 998 return pos; 999} 1000 1001bool c4_IndexedViewer::GetItem(int row_, int col_, c4_Bytes &buf_) { 1002 return _base.GetItem(row_, col_, buf_); 1003} 1004 1005bool c4_IndexedViewer::SetItem(int row_, int col_, const c4_Bytes &buf_) { 1006 const int id = _base.NthProperty(col_).GetId(); 1007 const bool keyMod = _props.FindProperty(id) >= 0; 1008 1009 if (keyMod) { 1010 c4_Bytes temp; 1011 _base.GetItem(row_, col_, temp); 1012 if (buf_ == temp) 1013 return true; 1014 // this call will have no effect, just ignore it 1015 } 1016 1017 _base.SetItem(row_, col_, buf_); 1018 1019 if (keyMod) { 1020 // TODO adjust index 1021 } 1022 1023 return true; 1024} 1025 1026bool c4_IndexedViewer::InsertRows(int, c4_Cursor value_, int count_) { 1027 d4_assert(count_ > 0); 1028 1029 if (_unique) 1030 count_ = 1; 1031 1032 int n; 1033 int i = Lookup(value_, n); 1034 1035 // XXX if the lookup does not work, then insert as first element (!?) 1036 d4_assert(i >= 0); 1037 if (i < 0) 1038 i = 0; 1039 1040 if (n == 0) 1041 _base.InsertAt(i, *value_); 1042 else { 1043 d4_assert(i < _base.GetSize()); 1044 _base.SetAt(i, *value_); // replace existing 1045 } 1046 1047 return true; 1048} 1049 1050bool c4_IndexedViewer::RemoveRows(int pos_, int count_) { 1051 _base.RemoveAt(pos_, count_); 1052 1053 int n = _map.GetSize(); 1054 while (--n >= 0) { 1055 int v = _mapProp(_map[n]); 1056 if (v >= pos_) 1057 if (v < pos_ + count_) 1058 _map.RemoveAt(n); 1059 else 1060 _mapProp(_map[n]) = v - count_; 1061 } 1062 1063 return true; 1064} 1065 1066///////////////////////////////////////////////////////////////////////////// 1067 1068c4_CustomViewer *f4_CreateReadOnly(c4_Sequence &seq_) { 1069 return d4_new c4_ReadOnlyViewer(seq_); 1070} 1071 1072c4_CustomViewer *f4_CreateHash(c4_Sequence &seq_, int nk_, c4_Sequence *map_) { 1073 return d4_new c4_HashViewer(seq_, nk_, map_); 1074} 1075 1076c4_CustomViewer *f4_CreateBlocked(c4_Sequence &seq_) { 1077 return d4_new c4_BlockedViewer(seq_); 1078} 1079 1080c4_CustomViewer *f4_CreateOrdered(c4_Sequence &seq_, int nk_) { 1081 return d4_new c4_OrderedViewer(seq_, nk_); 1082} 1083 1084c4_CustomViewer *f4_CreateIndexed(c4_Sequence &seq_, c4_Sequence &map_, const 1085 c4_View &props_, bool unique_) { 1086 return d4_new c4_IndexedViewer(seq_, map_, props_, unique_); 1087} 1088 1089///////////////////////////////////////////////////////////////////////////// 1090