1/* cache.c - routines to maintain an in-core cache of entries */ 2/* $OpenLDAP$ */ 3/* This work is part of OpenLDAP Software <http://www.openldap.org/>. 4 * 5 * Copyright 2000-2011 The OpenLDAP Foundation. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted only as authorized by the OpenLDAP 10 * Public License. 11 * 12 * A copy of this license is available in the file LICENSE in the 13 * top-level directory of the distribution or, alternatively, at 14 * <http://www.OpenLDAP.org/license.html>. 15 */ 16 17#include "portable.h" 18 19#include <stdio.h> 20 21#include <ac/errno.h> 22#include <ac/string.h> 23#include <ac/socket.h> 24 25#include "slap.h" 26 27#include "back-bdb.h" 28 29#include "ldap_rq.h" 30 31#ifdef BDB_HIER 32#define bdb_cache_lru_purge hdb_cache_lru_purge 33#endif 34static void bdb_cache_lru_purge( struct bdb_info *bdb ); 35 36static int bdb_cache_delete_internal(Cache *cache, EntryInfo *e, int decr); 37#ifdef LDAP_DEBUG 38#define SLAPD_UNUSED 39#ifdef SLAPD_UNUSED 40static void bdb_lru_print(Cache *cache); 41static void bdb_idtree_print(Cache *cache); 42#endif 43#endif 44 45/* For concurrency experiments only! */ 46#if 0 47#define ldap_pvt_thread_rdwr_wlock(a) 0 48#define ldap_pvt_thread_rdwr_wunlock(a) 0 49#define ldap_pvt_thread_rdwr_rlock(a) 0 50#define ldap_pvt_thread_rdwr_runlock(a) 0 51#endif 52 53#if 0 54#define ldap_pvt_thread_mutex_trylock(a) 0 55#endif 56 57static EntryInfo * 58bdb_cache_entryinfo_new( Cache *cache ) 59{ 60 EntryInfo *ei = NULL; 61 62 if ( cache->c_eifree ) { 63 ldap_pvt_thread_mutex_lock( &cache->c_eifree_mutex ); 64 if ( cache->c_eifree ) { 65 ei = cache->c_eifree; 66 cache->c_eifree = ei->bei_lrunext; 67 ei->bei_finders = 0; 68 ei->bei_lrunext = NULL; 69 } 70 ldap_pvt_thread_mutex_unlock( &cache->c_eifree_mutex ); 71 } 72 if ( !ei ) { 73 ei = ch_calloc(1, sizeof(EntryInfo)); 74 ldap_pvt_thread_mutex_init( &ei->bei_kids_mutex ); 75 } 76 77 ei->bei_state = CACHE_ENTRY_REFERENCED; 78 79 return ei; 80} 81 82static void 83bdb_cache_entryinfo_free( Cache *cache, EntryInfo *ei ) 84{ 85 free( ei->bei_nrdn.bv_val ); 86 BER_BVZERO( &ei->bei_nrdn ); 87#ifdef BDB_HIER 88 free( ei->bei_rdn.bv_val ); 89 BER_BVZERO( &ei->bei_rdn ); 90 ei->bei_modrdns = 0; 91 ei->bei_ckids = 0; 92 ei->bei_dkids = 0; 93#endif 94 ei->bei_parent = NULL; 95 ei->bei_kids = NULL; 96 ei->bei_lruprev = NULL; 97 98#if 0 99 ldap_pvt_thread_mutex_lock( &cache->c_eifree_mutex ); 100 ei->bei_lrunext = cache->c_eifree; 101 cache->c_eifree = ei; 102 ldap_pvt_thread_mutex_unlock( &cache->c_eifree_mutex ); 103#else 104 ldap_pvt_thread_mutex_destroy( &ei->bei_kids_mutex ); 105 ch_free( ei ); 106#endif 107} 108 109#define LRU_DEL( c, e ) do { \ 110 if ( e == e->bei_lruprev ) { \ 111 (c)->c_lruhead = (c)->c_lrutail = NULL; \ 112 } else { \ 113 if ( e == (c)->c_lruhead ) (c)->c_lruhead = e->bei_lruprev; \ 114 if ( e == (c)->c_lrutail ) (c)->c_lrutail = e->bei_lruprev; \ 115 e->bei_lrunext->bei_lruprev = e->bei_lruprev; \ 116 e->bei_lruprev->bei_lrunext = e->bei_lrunext; \ 117 } \ 118 e->bei_lruprev = NULL; \ 119} while ( 0 ) 120 121/* Note - we now use a Second-Chance / Clock algorithm instead of 122 * Least-Recently-Used. This tremendously improves concurrency 123 * because we no longer need to manipulate the lists every time an 124 * entry is touched. We only need to lock the lists when adding 125 * or deleting an entry. It's now a circular doubly-linked list. 126 * We always append to the tail, but the head traverses the circle 127 * during a purge operation. 128 */ 129static void 130bdb_cache_lru_link( struct bdb_info *bdb, EntryInfo *ei ) 131{ 132 133 /* Already linked, ignore */ 134 if ( ei->bei_lruprev ) 135 return; 136 137 /* Insert into circular LRU list */ 138 ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_lru_mutex ); 139 140 ei->bei_lruprev = bdb->bi_cache.c_lrutail; 141 if ( bdb->bi_cache.c_lrutail ) { 142 ei->bei_lrunext = bdb->bi_cache.c_lrutail->bei_lrunext; 143 bdb->bi_cache.c_lrutail->bei_lrunext = ei; 144 if ( ei->bei_lrunext ) 145 ei->bei_lrunext->bei_lruprev = ei; 146 } else { 147 ei->bei_lrunext = ei->bei_lruprev = ei; 148 bdb->bi_cache.c_lruhead = ei; 149 } 150 bdb->bi_cache.c_lrutail = ei; 151 ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_lru_mutex ); 152} 153 154#ifdef NO_THREADS 155#define NO_DB_LOCK 156#endif 157 158/* #define NO_DB_LOCK 1 */ 159/* Note: The BerkeleyDB locks are much slower than regular 160 * mutexes or rdwr locks. But the BDB implementation has the 161 * advantage of using a fixed size lock table, instead of 162 * allocating a lock object per entry in the DB. That's a 163 * key benefit for scaling. It also frees us from worrying 164 * about undetectable deadlocks between BDB activity and our 165 * own cache activity. It's still worth exploring faster 166 * alternatives though. 167 */ 168 169/* Atomically release and reacquire a lock */ 170int 171bdb_cache_entry_db_relock( 172 struct bdb_info *bdb, 173 DB_TXN *txn, 174 EntryInfo *ei, 175 int rw, 176 int tryOnly, 177 DB_LOCK *lock ) 178{ 179#ifdef NO_DB_LOCK 180 return 0; 181#else 182 int rc; 183 DBT lockobj; 184 DB_LOCKREQ list[2]; 185 186 if ( !lock ) return 0; 187 188 DBTzero( &lockobj ); 189 lockobj.data = &ei->bei_id; 190 lockobj.size = sizeof(ei->bei_id) + 1; 191 192 list[0].op = DB_LOCK_PUT; 193 list[0].lock = *lock; 194 list[1].op = DB_LOCK_GET; 195 list[1].lock = *lock; 196 list[1].mode = rw ? DB_LOCK_WRITE : DB_LOCK_READ; 197 list[1].obj = &lockobj; 198 rc = bdb->bi_dbenv->lock_vec(bdb->bi_dbenv, TXN_ID(txn), tryOnly ? DB_LOCK_NOWAIT : 0, 199 list, 2, NULL ); 200 201 if (rc && !tryOnly) { 202 Debug( LDAP_DEBUG_TRACE, 203 "bdb_cache_entry_db_relock: entry %ld, rw %d, rc %d\n", 204 ei->bei_id, rw, rc ); 205 } else { 206 *lock = list[1].lock; 207 } 208 return rc; 209#endif 210} 211 212static int 213bdb_cache_entry_db_lock( struct bdb_info *bdb, DB_TXN *txn, EntryInfo *ei, 214 int rw, int tryOnly, DB_LOCK *lock ) 215{ 216#ifdef NO_DB_LOCK 217 return 0; 218#else 219 int rc; 220 DBT lockobj; 221 int db_rw; 222 223 if ( !lock ) return 0; 224 225 if (rw) 226 db_rw = DB_LOCK_WRITE; 227 else 228 db_rw = DB_LOCK_READ; 229 230 DBTzero( &lockobj ); 231 lockobj.data = &ei->bei_id; 232 lockobj.size = sizeof(ei->bei_id) + 1; 233 234 rc = LOCK_GET(bdb->bi_dbenv, TXN_ID(txn), tryOnly ? DB_LOCK_NOWAIT : 0, 235 &lockobj, db_rw, lock); 236 if (rc && !tryOnly) { 237 Debug( LDAP_DEBUG_TRACE, 238 "bdb_cache_entry_db_lock: entry %ld, rw %d, rc %d\n", 239 ei->bei_id, rw, rc ); 240 } 241 return rc; 242#endif /* NO_DB_LOCK */ 243} 244 245int 246bdb_cache_entry_db_unlock ( struct bdb_info *bdb, DB_LOCK *lock ) 247{ 248#ifdef NO_DB_LOCK 249 return 0; 250#else 251 int rc; 252 253 if ( !lock || lock->mode == DB_LOCK_NG ) return 0; 254 255 rc = LOCK_PUT ( bdb->bi_dbenv, lock ); 256 return rc; 257#endif 258} 259 260void 261bdb_cache_return_entry_rw( struct bdb_info *bdb, Entry *e, 262 int rw, DB_LOCK *lock ) 263{ 264 EntryInfo *ei; 265 int free = 0; 266 267 ei = e->e_private; 268 if ( ei && ( ei->bei_state & CACHE_ENTRY_NOT_CACHED )) { 269 bdb_cache_entryinfo_lock( ei ); 270 if ( ei->bei_state & CACHE_ENTRY_NOT_CACHED ) { 271 /* Releasing the entry can only be done when 272 * we know that nobody else is using it, i.e we 273 * should have an entry_db writelock. But the 274 * flag is only set by the thread that loads the 275 * entry, and only if no other threads has found 276 * it while it was working. All other threads 277 * clear the flag, which mean that we should be 278 * the only thread using the entry if the flag 279 * is set here. 280 */ 281 ei->bei_e = NULL; 282 ei->bei_state ^= CACHE_ENTRY_NOT_CACHED; 283 free = 1; 284 } 285 bdb_cache_entryinfo_unlock( ei ); 286 } 287 bdb_cache_entry_db_unlock( bdb, lock ); 288 if ( free ) { 289 e->e_private = NULL; 290 bdb_entry_return( e ); 291 } 292} 293 294static int 295bdb_cache_entryinfo_destroy( EntryInfo *e ) 296{ 297 ldap_pvt_thread_mutex_destroy( &e->bei_kids_mutex ); 298 free( e->bei_nrdn.bv_val ); 299#ifdef BDB_HIER 300 free( e->bei_rdn.bv_val ); 301#endif 302 free( e ); 303 return 0; 304} 305 306/* Do a length-ordered sort on normalized RDNs */ 307static int 308bdb_rdn_cmp( const void *v_e1, const void *v_e2 ) 309{ 310 const EntryInfo *e1 = v_e1, *e2 = v_e2; 311 int rc = e1->bei_nrdn.bv_len - e2->bei_nrdn.bv_len; 312 if (rc == 0) { 313 rc = strncmp( e1->bei_nrdn.bv_val, e2->bei_nrdn.bv_val, 314 e1->bei_nrdn.bv_len ); 315 } 316 return rc; 317} 318 319static int 320bdb_id_cmp( const void *v_e1, const void *v_e2 ) 321{ 322 const EntryInfo *e1 = v_e1, *e2 = v_e2; 323 return e1->bei_id - e2->bei_id; 324} 325 326static int 327bdb_id_dup_err( void *v1, void *v2 ) 328{ 329 EntryInfo *e2 = v2; 330 e2->bei_lrunext = v1; 331 return -1; 332} 333 334/* Create an entryinfo in the cache. Caller must release the locks later. 335 */ 336static int 337bdb_entryinfo_add_internal( 338 struct bdb_info *bdb, 339 EntryInfo *ei, 340 EntryInfo **res ) 341{ 342 EntryInfo *ei2 = NULL; 343 344 *res = NULL; 345 346 ei2 = bdb_cache_entryinfo_new( &bdb->bi_cache ); 347 348 bdb_cache_entryinfo_lock( ei->bei_parent ); 349 ldap_pvt_thread_rdwr_wlock( &bdb->bi_cache.c_rwlock ); 350 351 ei2->bei_id = ei->bei_id; 352 ei2->bei_parent = ei->bei_parent; 353#ifdef BDB_HIER 354 ei2->bei_rdn = ei->bei_rdn; 355#endif 356#ifdef SLAP_ZONE_ALLOC 357 ei2->bei_bdb = bdb; 358#endif 359 360 /* Add to cache ID tree */ 361 if (avl_insert( &bdb->bi_cache.c_idtree, ei2, bdb_id_cmp, 362 bdb_id_dup_err )) { 363 EntryInfo *eix = ei2->bei_lrunext; 364 bdb_cache_entryinfo_free( &bdb->bi_cache, ei2 ); 365 ei2 = eix; 366#ifdef BDB_HIER 367 /* It got freed above because its value was 368 * assigned to ei2. 369 */ 370 ei->bei_rdn.bv_val = NULL; 371#endif 372 } else { 373 int rc; 374 375 bdb->bi_cache.c_eiused++; 376 ber_dupbv( &ei2->bei_nrdn, &ei->bei_nrdn ); 377 378 /* This is a new leaf node. But if parent had no kids, then it was 379 * a leaf and we would be decrementing that. So, only increment if 380 * the parent already has kids. 381 */ 382 if ( ei->bei_parent->bei_kids || !ei->bei_parent->bei_id ) 383 bdb->bi_cache.c_leaves++; 384 rc = avl_insert( &ei->bei_parent->bei_kids, ei2, bdb_rdn_cmp, 385 avl_dup_error ); 386#ifdef BDB_HIER 387 /* it's possible for hdb_cache_find_parent to beat us to it */ 388 if ( !rc ) { 389 ei->bei_parent->bei_ckids++; 390 } 391#endif 392 } 393 394 *res = ei2; 395 return 0; 396} 397 398/* Find the EntryInfo for the requested DN. If the DN cannot be found, return 399 * the info for its closest ancestor. *res should be NULL to process a 400 * complete DN starting from the tree root. Otherwise *res must be the 401 * immediate parent of the requested DN, and only the RDN will be searched. 402 * The EntryInfo is locked upon return and must be unlocked by the caller. 403 */ 404int 405bdb_cache_find_ndn( 406 Operation *op, 407 DB_TXN *txn, 408 struct berval *ndn, 409 EntryInfo **res ) 410{ 411 struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; 412 EntryInfo ei, *eip, *ei2; 413 int rc = 0; 414 char *ptr; 415 416 /* this function is always called with normalized DN */ 417 if ( *res ) { 418 /* we're doing a onelevel search for an RDN */ 419 ei.bei_nrdn.bv_val = ndn->bv_val; 420 ei.bei_nrdn.bv_len = dn_rdnlen( op->o_bd, ndn ); 421 eip = *res; 422 } else { 423 /* we're searching a full DN from the root */ 424 ptr = ndn->bv_val + ndn->bv_len - op->o_bd->be_nsuffix[0].bv_len; 425 ei.bei_nrdn.bv_val = ptr; 426 ei.bei_nrdn.bv_len = op->o_bd->be_nsuffix[0].bv_len; 427 /* Skip to next rdn if suffix is empty */ 428 if ( ei.bei_nrdn.bv_len == 0 ) { 429 for (ptr = ei.bei_nrdn.bv_val - 2; ptr > ndn->bv_val 430 && !DN_SEPARATOR(*ptr); ptr--) /* empty */; 431 if ( ptr >= ndn->bv_val ) { 432 if (DN_SEPARATOR(*ptr)) ptr++; 433 ei.bei_nrdn.bv_len = ei.bei_nrdn.bv_val - ptr; 434 ei.bei_nrdn.bv_val = ptr; 435 } 436 } 437 eip = &bdb->bi_cache.c_dntree; 438 } 439 440 for ( bdb_cache_entryinfo_lock( eip ); eip; ) { 441 eip->bei_state |= CACHE_ENTRY_REFERENCED; 442 ei.bei_parent = eip; 443 ei2 = (EntryInfo *)avl_find( eip->bei_kids, &ei, bdb_rdn_cmp ); 444 if ( !ei2 ) { 445 DBC *cursor; 446 int len = ei.bei_nrdn.bv_len; 447 448 if ( BER_BVISEMPTY( ndn )) { 449 *res = eip; 450 return LDAP_SUCCESS; 451 } 452 453 ei.bei_nrdn.bv_len = ndn->bv_len - 454 (ei.bei_nrdn.bv_val - ndn->bv_val); 455 eip->bei_finders++; 456 bdb_cache_entryinfo_unlock( eip ); 457 458 BDB_LOG_PRINTF( bdb->bi_dbenv, NULL, "slapd Reading %s", 459 ei.bei_nrdn.bv_val ); 460 461 cursor = NULL; 462 rc = bdb_dn2id( op, &ei.bei_nrdn, &ei, txn, &cursor ); 463 if (rc) { 464 bdb_cache_entryinfo_lock( eip ); 465 eip->bei_finders--; 466 if ( cursor ) cursor->c_close( cursor ); 467 *res = eip; 468 return rc; 469 } 470 471 BDB_LOG_PRINTF( bdb->bi_dbenv, NULL, "slapd Read got %s(%d)", 472 ei.bei_nrdn.bv_val, ei.bei_id ); 473 474 /* DN exists but needs to be added to cache */ 475 ei.bei_nrdn.bv_len = len; 476 rc = bdb_entryinfo_add_internal( bdb, &ei, &ei2 ); 477 /* add_internal left eip and c_rwlock locked */ 478 eip->bei_finders--; 479 ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); 480 if ( cursor ) cursor->c_close( cursor ); 481 if ( rc ) { 482 *res = eip; 483 return rc; 484 } 485 } 486 bdb_cache_entryinfo_lock( ei2 ); 487 if ( ei2->bei_state & CACHE_ENTRY_DELETED ) { 488 /* In the midst of deleting? Give it a chance to 489 * complete. 490 */ 491 bdb_cache_entryinfo_unlock( ei2 ); 492 bdb_cache_entryinfo_unlock( eip ); 493 ldap_pvt_thread_yield(); 494 bdb_cache_entryinfo_lock( eip ); 495 *res = eip; 496 return DB_NOTFOUND; 497 } 498 bdb_cache_entryinfo_unlock( eip ); 499 500 eip = ei2; 501 502 /* Advance to next lower RDN */ 503 for (ptr = ei.bei_nrdn.bv_val - 2; ptr > ndn->bv_val 504 && !DN_SEPARATOR(*ptr); ptr--) /* empty */; 505 if ( ptr >= ndn->bv_val ) { 506 if (DN_SEPARATOR(*ptr)) ptr++; 507 ei.bei_nrdn.bv_len = ei.bei_nrdn.bv_val - ptr - 1; 508 ei.bei_nrdn.bv_val = ptr; 509 } 510 if ( ptr < ndn->bv_val ) { 511 *res = eip; 512 break; 513 } 514 } 515 516 return rc; 517} 518 519#ifdef BDB_HIER 520/* Walk up the tree from a child node, looking for an ID that's already 521 * been linked into the cache. 522 */ 523int 524hdb_cache_find_parent( 525 Operation *op, 526 DB_TXN *txn, 527 ID id, 528 EntryInfo **res ) 529{ 530 struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; 531 EntryInfo ei, eip, *ei2 = NULL, *ein = NULL, *eir = NULL; 532 int rc, add; 533 534 ei.bei_id = id; 535 ei.bei_kids = NULL; 536 ei.bei_ckids = 0; 537 538 for (;;) { 539 rc = hdb_dn2id_parent( op, txn, &ei, &eip.bei_id ); 540 if ( rc ) break; 541 542 /* Save the previous node, if any */ 543 ei2 = ein; 544 545 /* Create a new node for the current ID */ 546 ein = bdb_cache_entryinfo_new( &bdb->bi_cache ); 547 ein->bei_id = ei.bei_id; 548 ein->bei_kids = ei.bei_kids; 549 ein->bei_nrdn = ei.bei_nrdn; 550 ein->bei_rdn = ei.bei_rdn; 551 ein->bei_ckids = ei.bei_ckids; 552#ifdef SLAP_ZONE_ALLOC 553 ein->bei_bdb = bdb; 554#endif 555 ei.bei_ckids = 0; 556 add = 1; 557 558 /* This node is not fully connected yet */ 559 ein->bei_state |= CACHE_ENTRY_NOT_LINKED; 560 561 /* If this is the first time, save this node 562 * to be returned later. 563 */ 564 if ( eir == NULL ) { 565 eir = ein; 566 ein->bei_finders++; 567 } 568 569again: 570 /* Insert this node into the ID tree */ 571 ldap_pvt_thread_rdwr_wlock( &bdb->bi_cache.c_rwlock ); 572 if ( avl_insert( &bdb->bi_cache.c_idtree, (caddr_t)ein, 573 bdb_id_cmp, bdb_id_dup_err ) ) { 574 EntryInfo *eix = ein->bei_lrunext; 575 576 if ( bdb_cache_entryinfo_trylock( eix )) { 577 ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); 578 ldap_pvt_thread_yield(); 579 goto again; 580 } 581 ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); 582 583 /* Someone else created this node just before us. 584 * Free our new copy and use the existing one. 585 */ 586 bdb_cache_entryinfo_free( &bdb->bi_cache, ein ); 587 588 /* if it was the node we were looking for, just return it */ 589 if ( eir == ein ) { 590 *res = eix; 591 rc = 0; 592 break; 593 } 594 595 ein = ei2; 596 ei2 = eix; 597 add = 0; 598 599 /* otherwise, link up what we have and return */ 600 goto gotparent; 601 } 602 603 /* If there was a previous node, link it to this one */ 604 if ( ei2 ) ei2->bei_parent = ein; 605 606 /* Look for this node's parent */ 607par2: 608 if ( eip.bei_id ) { 609 ei2 = (EntryInfo *) avl_find( bdb->bi_cache.c_idtree, 610 (caddr_t) &eip, bdb_id_cmp ); 611 } else { 612 ei2 = &bdb->bi_cache.c_dntree; 613 } 614 if ( ei2 && bdb_cache_entryinfo_trylock( ei2 )) { 615 ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); 616 ldap_pvt_thread_yield(); 617 ldap_pvt_thread_rdwr_wlock( &bdb->bi_cache.c_rwlock ); 618 goto par2; 619 } 620 if ( add ) 621 bdb->bi_cache.c_eiused++; 622 if ( ei2 && ( ei2->bei_kids || !ei2->bei_id )) 623 bdb->bi_cache.c_leaves++; 624 ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); 625 626gotparent: 627 /* Got the parent, link in and we're done. */ 628 if ( ei2 ) { 629 bdb_cache_entryinfo_lock( eir ); 630 ein->bei_parent = ei2; 631 632 if ( avl_insert( &ei2->bei_kids, (caddr_t)ein, bdb_rdn_cmp, 633 avl_dup_error) == 0 ) 634 ei2->bei_ckids++; 635 636 /* Reset all the state info */ 637 for (ein = eir; ein != ei2; ein=ein->bei_parent) 638 ein->bei_state &= ~CACHE_ENTRY_NOT_LINKED; 639 640 bdb_cache_entryinfo_unlock( ei2 ); 641 eir->bei_finders--; 642 643 *res = eir; 644 break; 645 } 646 ei.bei_kids = NULL; 647 ei.bei_id = eip.bei_id; 648 ei.bei_ckids = 1; 649 avl_insert( &ei.bei_kids, (caddr_t)ein, bdb_rdn_cmp, 650 avl_dup_error ); 651 } 652 return rc; 653} 654 655/* Used by hdb_dn2idl when loading the EntryInfo for all the children 656 * of a given node 657 */ 658int hdb_cache_load( 659 struct bdb_info *bdb, 660 EntryInfo *ei, 661 EntryInfo **res ) 662{ 663 EntryInfo *ei2; 664 int rc; 665 666 /* See if we already have this one */ 667 bdb_cache_entryinfo_lock( ei->bei_parent ); 668 ei2 = (EntryInfo *)avl_find( ei->bei_parent->bei_kids, ei, bdb_rdn_cmp ); 669 bdb_cache_entryinfo_unlock( ei->bei_parent ); 670 671 if ( !ei2 ) { 672 /* Not found, add it */ 673 struct berval bv; 674 675 /* bei_rdn was not malloc'd before, do it now */ 676 ber_dupbv( &bv, &ei->bei_rdn ); 677 ei->bei_rdn = bv; 678 679 rc = bdb_entryinfo_add_internal( bdb, ei, res ); 680 bdb_cache_entryinfo_unlock( ei->bei_parent ); 681 ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); 682 } else { 683 /* Found, return it */ 684 *res = ei2; 685 return 0; 686 } 687 return rc; 688} 689#endif 690 691/* This is best-effort only. If all entries in the cache are 692 * busy, they will all be kept. This is unlikely to happen 693 * unless the cache is very much smaller than the working set. 694 */ 695static void 696bdb_cache_lru_purge( struct bdb_info *bdb ) 697{ 698 DB_LOCK lock, *lockp; 699 EntryInfo *elru, *elnext = NULL; 700 int islocked; 701 ID eicount, ecount; 702 ID count, efree, eifree = 0; 703#ifdef LDAP_DEBUG 704 int iter; 705#endif 706 707 /* Wait for the mutex; we're the only one trying to purge. */ 708 ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_lru_mutex ); 709 710 if ( bdb->bi_cache.c_cursize > bdb->bi_cache.c_maxsize ) { 711 efree = bdb->bi_cache.c_cursize - bdb->bi_cache.c_maxsize; 712 efree += bdb->bi_cache.c_minfree; 713 } else { 714 efree = 0; 715 } 716 717 /* maximum number of EntryInfo leaves to cache. In slapcat 718 * we always free all leaf nodes. 719 */ 720 721 if ( slapMode & SLAP_TOOL_READONLY ) { 722 eifree = bdb->bi_cache.c_leaves; 723 } else if ( bdb->bi_cache.c_eimax && 724 bdb->bi_cache.c_leaves > bdb->bi_cache.c_eimax ) { 725 eifree = bdb->bi_cache.c_minfree * 10; 726 if ( eifree >= bdb->bi_cache.c_leaves ) 727 eifree /= 2; 728 } 729 730 if ( !efree && !eifree ) { 731 ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_lru_mutex ); 732 bdb->bi_cache.c_purging = 0; 733 return; 734 } 735 736 if ( bdb->bi_cache.c_txn ) { 737 lockp = &lock; 738 } else { 739 lockp = NULL; 740 } 741 742 count = 0; 743 eicount = 0; 744 ecount = 0; 745#ifdef LDAP_DEBUG 746 iter = 0; 747#endif 748 749 /* Look for an unused entry to remove */ 750 for ( elru = bdb->bi_cache.c_lruhead; elru; elru = elnext ) { 751 elnext = elru->bei_lrunext; 752 753 if ( bdb_cache_entryinfo_trylock( elru )) 754 goto bottom; 755 756 /* This flag implements the clock replacement behavior */ 757 if ( elru->bei_state & ( CACHE_ENTRY_REFERENCED )) { 758 elru->bei_state &= ~CACHE_ENTRY_REFERENCED; 759 bdb_cache_entryinfo_unlock( elru ); 760 goto bottom; 761 } 762 763 /* If this node is in the process of linking into the cache, 764 * or this node is being deleted, skip it. 765 */ 766 if (( elru->bei_state & ( CACHE_ENTRY_NOT_LINKED | 767 CACHE_ENTRY_DELETED | CACHE_ENTRY_LOADING | 768 CACHE_ENTRY_ONELEVEL )) || 769 elru->bei_finders > 0 ) { 770 bdb_cache_entryinfo_unlock( elru ); 771 goto bottom; 772 } 773 774 if ( bdb_cache_entryinfo_trylock( elru->bei_parent )) { 775 bdb_cache_entryinfo_unlock( elru ); 776 goto bottom; 777 } 778 779 /* entryinfo is locked */ 780 islocked = 1; 781 782 /* If we can successfully writelock it, then 783 * the object is idle. 784 */ 785 if ( bdb_cache_entry_db_lock( bdb, 786 bdb->bi_cache.c_txn, elru, 1, 1, lockp ) == 0 ) { 787 788 /* Free entry for this node if it's present */ 789 if ( elru->bei_e ) { 790 ecount++; 791 792 /* the cache may have gone over the limit while we 793 * weren't looking, so double check. 794 */ 795 if ( !efree && ecount > bdb->bi_cache.c_maxsize ) 796 efree = bdb->bi_cache.c_minfree; 797 798 if ( count < efree ) { 799 elru->bei_e->e_private = NULL; 800#ifdef SLAP_ZONE_ALLOC 801 bdb_entry_return( bdb, elru->bei_e, elru->bei_zseq ); 802#else 803 bdb_entry_return( elru->bei_e ); 804#endif 805 elru->bei_e = NULL; 806 count++; 807 } else { 808 /* Keep this node cached, skip to next */ 809 bdb_cache_entry_db_unlock( bdb, lockp ); 810 goto next; 811 } 812 } 813 bdb_cache_entry_db_unlock( bdb, lockp ); 814 815 /* 816 * If it is a leaf node, and we're over the limit, free it. 817 */ 818 if ( elru->bei_kids ) { 819 /* Drop from list, we ignore it... */ 820 LRU_DEL( &bdb->bi_cache, elru ); 821 } else if ( eicount < eifree ) { 822 /* Too many leaf nodes, free this one */ 823 bdb_cache_delete_internal( &bdb->bi_cache, elru, 0 ); 824 bdb_cache_delete_cleanup( &bdb->bi_cache, elru ); 825 islocked = 0; 826 eicount++; 827 } /* Leave on list until we need to free it */ 828 } 829 830next: 831 if ( islocked ) { 832 bdb_cache_entryinfo_unlock( elru ); 833 bdb_cache_entryinfo_unlock( elru->bei_parent ); 834 } 835 836 if ( count >= efree && eicount >= eifree ) 837 break; 838bottom: 839 if ( elnext == bdb->bi_cache.c_lruhead ) 840 break; 841#ifdef LDAP_DEBUG 842 iter++; 843#endif 844 } 845 846 if ( count || ecount > bdb->bi_cache.c_cursize ) { 847 ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_count_mutex ); 848 /* HACK: we seem to be losing track, fix up now */ 849 if ( ecount > bdb->bi_cache.c_cursize ) 850 bdb->bi_cache.c_cursize = ecount; 851 bdb->bi_cache.c_cursize -= count; 852 ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_count_mutex ); 853 } 854 bdb->bi_cache.c_lruhead = elnext; 855 ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_lru_mutex ); 856 bdb->bi_cache.c_purging = 0; 857} 858 859/* 860 * cache_find_id - find an entry in the cache, given id. 861 * The entry is locked for Read upon return. Call with flag ID_LOCKED if 862 * the supplied *eip was already locked. 863 */ 864 865int 866bdb_cache_find_id( 867 Operation *op, 868 DB_TXN *tid, 869 ID id, 870 EntryInfo **eip, 871 int flag, 872 DB_LOCK *lock ) 873{ 874 struct bdb_info *bdb = (struct bdb_info *) op->o_bd->be_private; 875 Entry *ep = NULL; 876 int rc = 0, load = 0; 877 EntryInfo ei = { 0 }; 878 879 ei.bei_id = id; 880 881#ifdef SLAP_ZONE_ALLOC 882 slap_zh_rlock(bdb->bi_cache.c_zctx); 883#endif 884 /* If we weren't given any info, see if we have it already cached */ 885 if ( !*eip ) { 886again: ldap_pvt_thread_rdwr_rlock( &bdb->bi_cache.c_rwlock ); 887 *eip = (EntryInfo *) avl_find( bdb->bi_cache.c_idtree, 888 (caddr_t) &ei, bdb_id_cmp ); 889 if ( *eip ) { 890 /* If the lock attempt fails, the info is in use */ 891 if ( bdb_cache_entryinfo_trylock( *eip )) { 892 int del = (*eip)->bei_state & CACHE_ENTRY_DELETED; 893 ldap_pvt_thread_rdwr_runlock( &bdb->bi_cache.c_rwlock ); 894 /* If this node is being deleted, treat 895 * as if the delete has already finished 896 */ 897 if ( del ) { 898 return DB_NOTFOUND; 899 } 900 /* otherwise, wait for the info to free up */ 901 ldap_pvt_thread_yield(); 902 goto again; 903 } 904 /* If this info isn't hooked up to its parent yet, 905 * unlock and wait for it to be fully initialized 906 */ 907 if ( (*eip)->bei_state & CACHE_ENTRY_NOT_LINKED ) { 908 bdb_cache_entryinfo_unlock( *eip ); 909 ldap_pvt_thread_rdwr_runlock( &bdb->bi_cache.c_rwlock ); 910 ldap_pvt_thread_yield(); 911 goto again; 912 } 913 flag |= ID_LOCKED; 914 } 915 ldap_pvt_thread_rdwr_runlock( &bdb->bi_cache.c_rwlock ); 916 } 917 918 /* See if the ID exists in the database; add it to the cache if so */ 919 if ( !*eip ) { 920#ifndef BDB_HIER 921 rc = bdb_id2entry( op->o_bd, tid, id, &ep ); 922 if ( rc == 0 ) { 923 rc = bdb_cache_find_ndn( op, tid, 924 &ep->e_nname, eip ); 925 if ( *eip ) flag |= ID_LOCKED; 926 if ( rc ) { 927 ep->e_private = NULL; 928#ifdef SLAP_ZONE_ALLOC 929 bdb_entry_return( bdb, ep, (*eip)->bei_zseq ); 930#else 931 bdb_entry_return( ep ); 932#endif 933 ep = NULL; 934 } 935 } 936#else 937 rc = hdb_cache_find_parent(op, tid, id, eip ); 938 if ( rc == 0 ) flag |= ID_LOCKED; 939#endif 940 } 941 942 /* Ok, we found the info, do we have the entry? */ 943 if ( rc == 0 ) { 944 if ( !( flag & ID_LOCKED )) { 945 bdb_cache_entryinfo_lock( *eip ); 946 flag |= ID_LOCKED; 947 } 948 949 if ( (*eip)->bei_state & CACHE_ENTRY_DELETED ) { 950 rc = DB_NOTFOUND; 951 } else { 952 (*eip)->bei_finders++; 953 (*eip)->bei_state |= CACHE_ENTRY_REFERENCED; 954 if ( flag & ID_NOENTRY ) { 955 bdb_cache_entryinfo_unlock( *eip ); 956 return 0; 957 } 958 /* Make sure only one thread tries to load the entry */ 959load1: 960#ifdef SLAP_ZONE_ALLOC 961 if ((*eip)->bei_e && !slap_zn_validate( 962 bdb->bi_cache.c_zctx, (*eip)->bei_e, (*eip)->bei_zseq)) { 963 (*eip)->bei_e = NULL; 964 (*eip)->bei_zseq = 0; 965 } 966#endif 967 if ( !(*eip)->bei_e && !((*eip)->bei_state & CACHE_ENTRY_LOADING)) { 968 load = 1; 969 (*eip)->bei_state |= CACHE_ENTRY_LOADING; 970 flag |= ID_CHKPURGE; 971 } 972 973 if ( !load ) { 974 /* Clear the uncached state if we are not 975 * loading it, i.e it is already cached or 976 * another thread is currently loading it. 977 */ 978 if ( (*eip)->bei_state & CACHE_ENTRY_NOT_CACHED ) { 979 (*eip)->bei_state ^= CACHE_ENTRY_NOT_CACHED; 980 flag |= ID_CHKPURGE; 981 } 982 } 983 984 if ( flag & ID_LOCKED ) { 985 bdb_cache_entryinfo_unlock( *eip ); 986 flag ^= ID_LOCKED; 987 } 988 rc = bdb_cache_entry_db_lock( bdb, tid, *eip, load, 0, lock ); 989 if ( (*eip)->bei_state & CACHE_ENTRY_DELETED ) { 990 rc = DB_NOTFOUND; 991 bdb_cache_entry_db_unlock( bdb, lock ); 992 bdb_cache_entryinfo_lock( *eip ); 993 (*eip)->bei_finders--; 994 bdb_cache_entryinfo_unlock( *eip ); 995 } else if ( rc == 0 ) { 996 if ( load ) { 997 if ( !ep) { 998 rc = bdb_id2entry( op->o_bd, tid, id, &ep ); 999 } 1000 if ( rc == 0 ) { 1001 ep->e_private = *eip; 1002#ifdef BDB_HIER 1003 while ( (*eip)->bei_state & CACHE_ENTRY_NOT_LINKED ) 1004 ldap_pvt_thread_yield(); 1005 bdb_fix_dn( ep, 0 ); 1006#endif 1007 bdb_cache_entryinfo_lock( *eip ); 1008 1009 (*eip)->bei_e = ep; 1010#ifdef SLAP_ZONE_ALLOC 1011 (*eip)->bei_zseq = *((ber_len_t *)ep - 2); 1012#endif 1013 ep = NULL; 1014 if ( flag & ID_NOCACHE ) { 1015 /* Set the cached state only if no other thread 1016 * found the info while we were loading the entry. 1017 */ 1018 if ( (*eip)->bei_finders == 1 ) { 1019 (*eip)->bei_state |= CACHE_ENTRY_NOT_CACHED; 1020 flag ^= ID_CHKPURGE; 1021 } 1022 } 1023 bdb_cache_entryinfo_unlock( *eip ); 1024 bdb_cache_lru_link( bdb, *eip ); 1025 } 1026 if ( rc == 0 ) { 1027 /* If we succeeded, downgrade back to a readlock. */ 1028 rc = bdb_cache_entry_db_relock( bdb, tid, 1029 *eip, 0, 0, lock ); 1030 } else { 1031 /* Otherwise, release the lock. */ 1032 bdb_cache_entry_db_unlock( bdb, lock ); 1033 } 1034 } else if ( !(*eip)->bei_e ) { 1035 /* Some other thread is trying to load the entry, 1036 * wait for it to finish. 1037 */ 1038 bdb_cache_entry_db_unlock( bdb, lock ); 1039 bdb_cache_entryinfo_lock( *eip ); 1040 flag |= ID_LOCKED; 1041 goto load1; 1042#ifdef BDB_HIER 1043 } else { 1044 /* Check for subtree renames 1045 */ 1046 rc = bdb_fix_dn( (*eip)->bei_e, 1 ); 1047 if ( rc ) { 1048 bdb_cache_entry_db_relock( bdb, 1049 tid, *eip, 1, 0, lock ); 1050 /* check again in case other modifier did it already */ 1051 if ( bdb_fix_dn( (*eip)->bei_e, 1 ) ) 1052 rc = bdb_fix_dn( (*eip)->bei_e, 2 ); 1053 bdb_cache_entry_db_relock( bdb, 1054 tid, *eip, 0, 0, lock ); 1055 } 1056#endif 1057 } 1058 bdb_cache_entryinfo_lock( *eip ); 1059 (*eip)->bei_finders--; 1060 if ( load ) 1061 (*eip)->bei_state ^= CACHE_ENTRY_LOADING; 1062 bdb_cache_entryinfo_unlock( *eip ); 1063 } 1064 } 1065 } 1066 if ( flag & ID_LOCKED ) { 1067 bdb_cache_entryinfo_unlock( *eip ); 1068 } 1069 if ( ep ) { 1070 ep->e_private = NULL; 1071#ifdef SLAP_ZONE_ALLOC 1072 bdb_entry_return( bdb, ep, (*eip)->bei_zseq ); 1073#else 1074 bdb_entry_return( ep ); 1075#endif 1076 } 1077 if ( rc == 0 ) { 1078 int purge = 0; 1079 1080 if (( flag & ID_CHKPURGE ) || bdb->bi_cache.c_eimax ) { 1081 ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_count_mutex ); 1082 if ( flag & ID_CHKPURGE ) { 1083 bdb->bi_cache.c_cursize++; 1084 if ( !bdb->bi_cache.c_purging && bdb->bi_cache.c_cursize > bdb->bi_cache.c_maxsize ) { 1085 purge = 1; 1086 bdb->bi_cache.c_purging = 1; 1087 } 1088 } else if ( !bdb->bi_cache.c_purging && bdb->bi_cache.c_eimax && bdb->bi_cache.c_leaves > bdb->bi_cache.c_eimax ) { 1089 purge = 1; 1090 bdb->bi_cache.c_purging = 1; 1091 } 1092 ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_count_mutex ); 1093 } 1094 if ( purge ) 1095 bdb_cache_lru_purge( bdb ); 1096 } 1097 1098#ifdef SLAP_ZONE_ALLOC 1099 if (rc == 0 && (*eip)->bei_e) { 1100 slap_zn_rlock(bdb->bi_cache.c_zctx, (*eip)->bei_e); 1101 } 1102 slap_zh_runlock(bdb->bi_cache.c_zctx); 1103#endif 1104 return rc; 1105} 1106 1107int 1108bdb_cache_children( 1109 Operation *op, 1110 DB_TXN *txn, 1111 Entry *e ) 1112{ 1113 int rc; 1114 1115 if ( BEI(e)->bei_kids ) { 1116 return 0; 1117 } 1118 if ( BEI(e)->bei_state & CACHE_ENTRY_NO_KIDS ) { 1119 return DB_NOTFOUND; 1120 } 1121 rc = bdb_dn2id_children( op, txn, e ); 1122 if ( rc == DB_NOTFOUND ) { 1123 BEI(e)->bei_state |= CACHE_ENTRY_NO_KIDS | CACHE_ENTRY_NO_GRANDKIDS; 1124 } 1125 return rc; 1126} 1127 1128/* Update the cache after a successful database Add. */ 1129int 1130bdb_cache_add( 1131 struct bdb_info *bdb, 1132 EntryInfo *eip, 1133 Entry *e, 1134 struct berval *nrdn, 1135 DB_TXN *txn, 1136 DB_LOCK *lock ) 1137{ 1138 EntryInfo *new, ei; 1139 int rc, purge = 0; 1140#ifdef BDB_HIER 1141 struct berval rdn = e->e_name; 1142#endif 1143 1144 ei.bei_id = e->e_id; 1145 ei.bei_parent = eip; 1146 ei.bei_nrdn = *nrdn; 1147 ei.bei_lockpad = 0; 1148 1149#if 0 1150 /* Lock this entry so that bdb_add can run to completion. 1151 * It can only fail if BDB has run out of lock resources. 1152 */ 1153 rc = bdb_cache_entry_db_lock( bdb, txn, &ei, 0, 0, lock ); 1154 if ( rc ) { 1155 bdb_cache_entryinfo_unlock( eip ); 1156 return rc; 1157 } 1158#endif 1159 1160#ifdef BDB_HIER 1161 if ( nrdn->bv_len != e->e_nname.bv_len ) { 1162 char *ptr = ber_bvchr( &rdn, ',' ); 1163 assert( ptr != NULL ); 1164 rdn.bv_len = ptr - rdn.bv_val; 1165 } 1166 ber_dupbv( &ei.bei_rdn, &rdn ); 1167 if ( eip->bei_dkids ) eip->bei_dkids++; 1168#endif 1169 1170 if (eip->bei_parent) { 1171 bdb_cache_entryinfo_lock( eip->bei_parent ); 1172 eip->bei_parent->bei_state &= ~CACHE_ENTRY_NO_GRANDKIDS; 1173 bdb_cache_entryinfo_unlock( eip->bei_parent ); 1174 } 1175 1176 rc = bdb_entryinfo_add_internal( bdb, &ei, &new ); 1177 /* bdb_csn_commit can cause this when adding the database root entry */ 1178 if ( new->bei_e ) { 1179 new->bei_e->e_private = NULL; 1180#ifdef SLAP_ZONE_ALLOC 1181 bdb_entry_return( bdb, new->bei_e, new->bei_zseq ); 1182#else 1183 bdb_entry_return( new->bei_e ); 1184#endif 1185 } 1186 new->bei_e = e; 1187 e->e_private = new; 1188 new->bei_state |= CACHE_ENTRY_NO_KIDS | CACHE_ENTRY_NO_GRANDKIDS; 1189 eip->bei_state &= ~CACHE_ENTRY_NO_KIDS; 1190 bdb_cache_entryinfo_unlock( eip ); 1191 1192 ldap_pvt_thread_rdwr_wunlock( &bdb->bi_cache.c_rwlock ); 1193 ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_count_mutex ); 1194 ++bdb->bi_cache.c_cursize; 1195 if ( bdb->bi_cache.c_cursize > bdb->bi_cache.c_maxsize && 1196 !bdb->bi_cache.c_purging ) { 1197 purge = 1; 1198 bdb->bi_cache.c_purging = 1; 1199 } 1200 ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_count_mutex ); 1201 1202 new->bei_finders = 1; 1203 bdb_cache_lru_link( bdb, new ); 1204 1205 if ( purge ) 1206 bdb_cache_lru_purge( bdb ); 1207 1208 return rc; 1209} 1210 1211void bdb_cache_deref( 1212 EntryInfo *ei 1213 ) 1214{ 1215 bdb_cache_entryinfo_lock( ei ); 1216 ei->bei_finders--; 1217 bdb_cache_entryinfo_unlock( ei ); 1218} 1219 1220int 1221bdb_cache_modify( 1222 struct bdb_info *bdb, 1223 Entry *e, 1224 Attribute *newAttrs, 1225 DB_TXN *txn, 1226 DB_LOCK *lock ) 1227{ 1228 EntryInfo *ei = BEI(e); 1229 int rc; 1230 /* Get write lock on data */ 1231 rc = bdb_cache_entry_db_relock( bdb, txn, ei, 1, 0, lock ); 1232 1233 /* If we've done repeated mods on a cached entry, then e_attrs 1234 * is no longer contiguous with the entry, and must be freed. 1235 */ 1236 if ( ! rc ) { 1237 if ( (void *)e->e_attrs != (void *)(e+1) ) { 1238 attrs_free( e->e_attrs ); 1239 } 1240 e->e_attrs = newAttrs; 1241 } 1242 return rc; 1243} 1244 1245/* 1246 * Change the rdn in the entryinfo. Also move to a new parent if needed. 1247 */ 1248int 1249bdb_cache_modrdn( 1250 struct bdb_info *bdb, 1251 Entry *e, 1252 struct berval *nrdn, 1253 Entry *new, 1254 EntryInfo *ein, 1255 DB_TXN *txn, 1256 DB_LOCK *lock ) 1257{ 1258 EntryInfo *ei = BEI(e), *pei; 1259 int rc; 1260#ifdef BDB_HIER 1261 struct berval rdn; 1262#endif 1263 1264 /* Get write lock on data */ 1265 rc = bdb_cache_entry_db_relock( bdb, txn, ei, 1, 0, lock ); 1266 if ( rc ) return rc; 1267 1268 /* If we've done repeated mods on a cached entry, then e_attrs 1269 * is no longer contiguous with the entry, and must be freed. 1270 */ 1271 if ( (void *)e->e_attrs != (void *)(e+1) ) { 1272 attrs_free( e->e_attrs ); 1273 } 1274 e->e_attrs = new->e_attrs; 1275 if( e->e_nname.bv_val < e->e_bv.bv_val || 1276 e->e_nname.bv_val > e->e_bv.bv_val + e->e_bv.bv_len ) 1277 { 1278 ch_free(e->e_name.bv_val); 1279 ch_free(e->e_nname.bv_val); 1280 } 1281 e->e_name = new->e_name; 1282 e->e_nname = new->e_nname; 1283 1284 /* Lock the parent's kids AVL tree */ 1285 pei = ei->bei_parent; 1286 bdb_cache_entryinfo_lock( pei ); 1287 avl_delete( &pei->bei_kids, (caddr_t) ei, bdb_rdn_cmp ); 1288 free( ei->bei_nrdn.bv_val ); 1289 ber_dupbv( &ei->bei_nrdn, nrdn ); 1290 1291#ifdef BDB_HIER 1292 free( ei->bei_rdn.bv_val ); 1293 1294 rdn = e->e_name; 1295 if ( nrdn->bv_len != e->e_nname.bv_len ) { 1296 char *ptr = ber_bvchr(&rdn, ','); 1297 assert( ptr != NULL ); 1298 rdn.bv_len = ptr - rdn.bv_val; 1299 } 1300 ber_dupbv( &ei->bei_rdn, &rdn ); 1301 1302 /* If new parent, decrement kid counts */ 1303 if ( ein ) { 1304 pei->bei_ckids--; 1305 if ( pei->bei_dkids ) { 1306 pei->bei_dkids--; 1307 if ( pei->bei_dkids < 2 ) 1308 pei->bei_state |= CACHE_ENTRY_NO_KIDS | CACHE_ENTRY_NO_GRANDKIDS; 1309 } 1310 } 1311#endif 1312 1313 if (!ein) { 1314 ein = ei->bei_parent; 1315 } else { 1316 ei->bei_parent = ein; 1317 bdb_cache_entryinfo_unlock( pei ); 1318 bdb_cache_entryinfo_lock( ein ); 1319 1320 /* new parent now has kids */ 1321 if ( ein->bei_state & CACHE_ENTRY_NO_KIDS ) 1322 ein->bei_state ^= CACHE_ENTRY_NO_KIDS; 1323 /* grandparent has grandkids */ 1324 if ( ein->bei_parent ) 1325 ein->bei_parent->bei_state &= ~CACHE_ENTRY_NO_GRANDKIDS; 1326#ifdef BDB_HIER 1327 /* parent might now have grandkids */ 1328 if ( ein->bei_state & CACHE_ENTRY_NO_GRANDKIDS && 1329 !(ei->bei_state & CACHE_ENTRY_NO_KIDS)) 1330 ein->bei_state ^= CACHE_ENTRY_NO_GRANDKIDS; 1331 1332 ein->bei_ckids++; 1333 if ( ein->bei_dkids ) ein->bei_dkids++; 1334#endif 1335 } 1336 1337#ifdef BDB_HIER 1338 /* Record the generation number of this change */ 1339 ldap_pvt_thread_mutex_lock( &bdb->bi_modrdns_mutex ); 1340 bdb->bi_modrdns++; 1341 ei->bei_modrdns = bdb->bi_modrdns; 1342 ldap_pvt_thread_mutex_unlock( &bdb->bi_modrdns_mutex ); 1343#endif 1344 1345 avl_insert( &ein->bei_kids, ei, bdb_rdn_cmp, avl_dup_error ); 1346 bdb_cache_entryinfo_unlock( ein ); 1347 return rc; 1348} 1349/* 1350 * cache_delete - delete the entry e from the cache. 1351 * 1352 * returns: 0 e was deleted ok 1353 * 1 e was not in the cache 1354 * -1 something bad happened 1355 */ 1356int 1357bdb_cache_delete( 1358 struct bdb_info *bdb, 1359 Entry *e, 1360 DB_TXN *txn, 1361 DB_LOCK *lock ) 1362{ 1363 EntryInfo *ei = BEI(e); 1364 int rc, busy = 0; 1365 1366 assert( e->e_private != NULL ); 1367 1368 /* Lock the entry's info */ 1369 bdb_cache_entryinfo_lock( ei ); 1370 1371 /* Set this early, warn off any queriers */ 1372 ei->bei_state |= CACHE_ENTRY_DELETED; 1373 1374 if (( ei->bei_state & ( CACHE_ENTRY_NOT_LINKED | 1375 CACHE_ENTRY_LOADING | CACHE_ENTRY_ONELEVEL )) || 1376 ei->bei_finders > 0 ) 1377 busy = 1; 1378 1379 bdb_cache_entryinfo_unlock( ei ); 1380 1381 while ( busy ) { 1382 ldap_pvt_thread_yield(); 1383 busy = 0; 1384 bdb_cache_entryinfo_lock( ei ); 1385 if (( ei->bei_state & ( CACHE_ENTRY_NOT_LINKED | 1386 CACHE_ENTRY_LOADING | CACHE_ENTRY_ONELEVEL )) || 1387 ei->bei_finders > 0 ) 1388 busy = 1; 1389 bdb_cache_entryinfo_unlock( ei ); 1390 } 1391 1392 /* Get write lock on the data */ 1393 rc = bdb_cache_entry_db_relock( bdb, txn, ei, 1, 0, lock ); 1394 if ( rc ) { 1395 bdb_cache_entryinfo_lock( ei ); 1396 /* couldn't lock, undo and give up */ 1397 ei->bei_state ^= CACHE_ENTRY_DELETED; 1398 bdb_cache_entryinfo_unlock( ei ); 1399 return rc; 1400 } 1401 1402 Debug( LDAP_DEBUG_TRACE, "====> bdb_cache_delete( %ld )\n", 1403 e->e_id, 0, 0 ); 1404 1405 /* set lru mutex */ 1406 ldap_pvt_thread_mutex_lock( &bdb->bi_cache.c_lru_mutex ); 1407 1408 bdb_cache_entryinfo_lock( ei->bei_parent ); 1409 bdb_cache_entryinfo_lock( ei ); 1410 rc = bdb_cache_delete_internal( &bdb->bi_cache, e->e_private, 1 ); 1411 bdb_cache_entryinfo_unlock( ei ); 1412 1413 /* free lru mutex */ 1414 ldap_pvt_thread_mutex_unlock( &bdb->bi_cache.c_lru_mutex ); 1415 1416 return( rc ); 1417} 1418 1419void 1420bdb_cache_delete_cleanup( 1421 Cache *cache, 1422 EntryInfo *ei ) 1423{ 1424 /* Enter with ei locked */ 1425 1426 /* already freed? */ 1427 if ( !ei->bei_parent ) return; 1428 1429 if ( ei->bei_e ) { 1430 ei->bei_e->e_private = NULL; 1431#ifdef SLAP_ZONE_ALLOC 1432 bdb_entry_return( ei->bei_bdb, ei->bei_e, ei->bei_zseq ); 1433#else 1434 bdb_entry_return( ei->bei_e ); 1435#endif 1436 ei->bei_e = NULL; 1437 } 1438 1439 bdb_cache_entryinfo_unlock( ei ); 1440 bdb_cache_entryinfo_free( cache, ei ); 1441} 1442 1443static int 1444bdb_cache_delete_internal( 1445 Cache *cache, 1446 EntryInfo *e, 1447 int decr ) 1448{ 1449 int rc = 0; /* return code */ 1450 int decr_leaf = 0; 1451 1452 /* already freed? */ 1453 if ( !e->bei_parent ) { 1454 assert(0); 1455 return -1; 1456 } 1457 1458#ifdef BDB_HIER 1459 e->bei_parent->bei_ckids--; 1460 if ( decr && e->bei_parent->bei_dkids ) e->bei_parent->bei_dkids--; 1461#endif 1462 /* dn tree */ 1463 if ( avl_delete( &e->bei_parent->bei_kids, (caddr_t) e, bdb_rdn_cmp ) 1464 == NULL ) 1465 { 1466 rc = -1; 1467 assert(0); 1468 } 1469 if ( e->bei_parent->bei_kids ) 1470 decr_leaf = 1; 1471 1472 ldap_pvt_thread_rdwr_wlock( &cache->c_rwlock ); 1473 /* id tree */ 1474 if ( avl_delete( &cache->c_idtree, (caddr_t) e, bdb_id_cmp )) { 1475 cache->c_eiused--; 1476 if ( decr_leaf ) 1477 cache->c_leaves--; 1478 } else { 1479 rc = -1; 1480 assert(0); 1481 } 1482 ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock ); 1483 bdb_cache_entryinfo_unlock( e->bei_parent ); 1484 1485 if ( rc == 0 ){ 1486 /* lru */ 1487 LRU_DEL( cache, e ); 1488 1489 if ( e->bei_e ) { 1490 ldap_pvt_thread_mutex_lock( &cache->c_count_mutex ); 1491 cache->c_cursize--; 1492 ldap_pvt_thread_mutex_unlock( &cache->c_count_mutex ); 1493 } 1494 } 1495 1496 return( rc ); 1497} 1498 1499static void 1500bdb_entryinfo_release( void *data ) 1501{ 1502 EntryInfo *ei = (EntryInfo *)data; 1503 if ( ei->bei_kids ) { 1504 avl_free( ei->bei_kids, NULL ); 1505 } 1506 if ( ei->bei_e ) { 1507 ei->bei_e->e_private = NULL; 1508#ifdef SLAP_ZONE_ALLOC 1509 bdb_entry_return( ei->bei_bdb, ei->bei_e, ei->bei_zseq ); 1510#else 1511 bdb_entry_return( ei->bei_e ); 1512#endif 1513 } 1514 bdb_cache_entryinfo_destroy( ei ); 1515} 1516 1517void 1518bdb_cache_release_all( Cache *cache ) 1519{ 1520 /* set cache write lock */ 1521 ldap_pvt_thread_rdwr_wlock( &cache->c_rwlock ); 1522 /* set lru mutex */ 1523 ldap_pvt_thread_mutex_lock( &cache->c_lru_mutex ); 1524 1525 Debug( LDAP_DEBUG_TRACE, "====> bdb_cache_release_all\n", 0, 0, 0 ); 1526 1527 avl_free( cache->c_dntree.bei_kids, NULL ); 1528 avl_free( cache->c_idtree, bdb_entryinfo_release ); 1529 for (;cache->c_eifree;cache->c_eifree = cache->c_lruhead) { 1530 cache->c_lruhead = cache->c_eifree->bei_lrunext; 1531 bdb_cache_entryinfo_destroy(cache->c_eifree); 1532 } 1533 cache->c_cursize = 0; 1534 cache->c_eiused = 0; 1535 cache->c_leaves = 0; 1536 cache->c_idtree = NULL; 1537 cache->c_lruhead = NULL; 1538 cache->c_lrutail = NULL; 1539 cache->c_dntree.bei_kids = NULL; 1540 1541 /* free lru mutex */ 1542 ldap_pvt_thread_mutex_unlock( &cache->c_lru_mutex ); 1543 /* free cache write lock */ 1544 ldap_pvt_thread_rdwr_wunlock( &cache->c_rwlock ); 1545} 1546 1547#ifdef LDAP_DEBUG 1548static void 1549bdb_lru_count( Cache *cache ) 1550{ 1551 EntryInfo *e; 1552 int ei = 0, ent = 0, nc = 0; 1553 1554 for ( e = cache->c_lrutail; ; ) { 1555 ei++; 1556 if ( e->bei_e ) { 1557 ent++; 1558 if ( e->bei_state & CACHE_ENTRY_NOT_CACHED ) 1559 nc++; 1560 fprintf( stderr, "ei %d entry %p dn %s\n", ei, (void *) e->bei_e, e->bei_e->e_name.bv_val ); 1561 } 1562 e = e->bei_lrunext; 1563 if ( e == cache->c_lrutail ) 1564 break; 1565 } 1566 fprintf( stderr, "counted %d entryInfos and %d entries, %d notcached\n", 1567 ei, ent, nc ); 1568 ei = 0; 1569 for ( e = cache->c_lrutail; ; ) { 1570 ei++; 1571 e = e->bei_lruprev; 1572 if ( e == cache->c_lrutail ) 1573 break; 1574 } 1575 fprintf( stderr, "counted %d entryInfos (on lruprev)\n", ei ); 1576} 1577 1578#ifdef SLAPD_UNUSED 1579static void 1580bdb_lru_print( Cache *cache ) 1581{ 1582 EntryInfo *e; 1583 1584 fprintf( stderr, "LRU circle head: %p\n", (void *) cache->c_lruhead ); 1585 fprintf( stderr, "LRU circle (tail forward):\n" ); 1586 for ( e = cache->c_lrutail; ; ) { 1587 fprintf( stderr, "\t%p, %p id %ld rdn \"%s\"\n", 1588 (void *) e, (void *) e->bei_e, e->bei_id, e->bei_nrdn.bv_val ); 1589 e = e->bei_lrunext; 1590 if ( e == cache->c_lrutail ) 1591 break; 1592 } 1593 fprintf( stderr, "LRU circle (tail backward):\n" ); 1594 for ( e = cache->c_lrutail; ; ) { 1595 fprintf( stderr, "\t%p, %p id %ld rdn \"%s\"\n", 1596 (void *) e, (void *) e->bei_e, e->bei_id, e->bei_nrdn.bv_val ); 1597 e = e->bei_lruprev; 1598 if ( e == cache->c_lrutail ) 1599 break; 1600 } 1601} 1602 1603static int 1604bdb_entryinfo_print(void *data, void *arg) 1605{ 1606 EntryInfo *e = data; 1607 fprintf( stderr, "\t%p, %p id %ld rdn \"%s\"\n", 1608 (void *) e, (void *) e->bei_e, e->bei_id, e->bei_nrdn.bv_val ); 1609 return 0; 1610} 1611 1612static void 1613bdb_idtree_print(Cache *cache) 1614{ 1615 avl_apply( cache->c_idtree, bdb_entryinfo_print, NULL, -1, AVL_INORDER ); 1616} 1617#endif 1618#endif 1619 1620static void 1621bdb_reader_free( void *key, void *data ) 1622{ 1623 /* DB_ENV *env = key; */ 1624 DB_TXN *txn = data; 1625 1626 if ( txn ) TXN_ABORT( txn ); 1627} 1628 1629/* free up any keys used by the main thread */ 1630void 1631bdb_reader_flush( DB_ENV *env ) 1632{ 1633 void *data; 1634 void *ctx = ldap_pvt_thread_pool_context(); 1635 1636 if ( !ldap_pvt_thread_pool_getkey( ctx, env, &data, NULL ) ) { 1637 ldap_pvt_thread_pool_setkey( ctx, env, NULL, 0, NULL, NULL ); 1638 bdb_reader_free( env, data ); 1639 } 1640} 1641 1642int 1643bdb_reader_get( Operation *op, DB_ENV *env, DB_TXN **txn ) 1644{ 1645 int i, rc; 1646 void *data; 1647 void *ctx; 1648 1649 if ( !env || !txn ) return -1; 1650 1651 /* If no op was provided, try to find the ctx anyway... */ 1652 if ( op ) { 1653 ctx = op->o_threadctx; 1654 } else { 1655 ctx = ldap_pvt_thread_pool_context(); 1656 } 1657 1658 /* Shouldn't happen unless we're single-threaded */ 1659 if ( !ctx ) { 1660 *txn = NULL; 1661 return 0; 1662 } 1663 1664 if ( ldap_pvt_thread_pool_getkey( ctx, env, &data, NULL ) ) { 1665 for ( i=0, rc=1; rc != 0 && i<4; i++ ) { 1666 rc = TXN_BEGIN( env, NULL, txn, DB_READ_COMMITTED ); 1667 if (rc) ldap_pvt_thread_yield(); 1668 } 1669 if ( rc != 0) { 1670 return rc; 1671 } 1672 data = *txn; 1673 if ( ( rc = ldap_pvt_thread_pool_setkey( ctx, env, 1674 data, bdb_reader_free, NULL, NULL ) ) ) { 1675 TXN_ABORT( *txn ); 1676 Debug( LDAP_DEBUG_ANY, "bdb_reader_get: err %s(%d)\n", 1677 db_strerror(rc), rc, 0 ); 1678 1679 return rc; 1680 } 1681 } else { 1682 *txn = data; 1683 } 1684 return 0; 1685} 1686