1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996-2009 Oracle. All rights reserved. 5 * 6 * $Id$ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/mp.h" 13#include "dbinc/db_page.h" 14#include "dbinc/hash.h" 15 16/* 17 * __memp_env_create -- 18 * Mpool specific creation of the DB_ENV structure. 19 * 20 * PUBLIC: int __memp_env_create __P((DB_ENV *)); 21 */ 22int 23__memp_env_create(dbenv) 24 DB_ENV *dbenv; 25{ 26 /* 27 * !!! 28 * Our caller has not yet had the opportunity to reset the panic 29 * state or turn off mutex locking, and so we can neither check 30 * the panic state or acquire a mutex in the DB_ENV create path. 31 * 32 * We default to 32 8K pages. We don't default to a flat 256K, because 33 * some systems require significantly more memory to hold 32 pages than 34 * others. For example, HP-UX with POSIX pthreads needs 88 bytes for 35 * a POSIX pthread mutex and almost 200 bytes per buffer header, while 36 * Solaris needs 24 and 52 bytes for the same structures. The minimum 37 * number of hash buckets is 37. These contain a mutex also. 38 */ 39 dbenv->mp_bytes = dbenv->mp_max_bytes = 40 32 * ((8 * 1024) + sizeof(BH)) + 37 * sizeof(DB_MPOOL_HASH); 41 dbenv->mp_ncache = 1; 42 43 return (0); 44} 45 46/* 47 * __memp_env_destroy -- 48 * Mpool specific destruction of the DB_ENV structure. 49 * 50 * PUBLIC: void __memp_env_destroy __P((DB_ENV *)); 51 */ 52void 53__memp_env_destroy(dbenv) 54 DB_ENV *dbenv; 55{ 56 COMPQUIET(dbenv, NULL); 57} 58 59/* 60 * __memp_get_cachesize -- 61 * {DB_ENV,DB}->get_cachesize. 62 * 63 * PUBLIC: int __memp_get_cachesize 64 * PUBLIC: __P((DB_ENV *, u_int32_t *, u_int32_t *, int *)); 65 */ 66int 67__memp_get_cachesize(dbenv, gbytesp, bytesp, ncachep) 68 DB_ENV *dbenv; 69 u_int32_t *gbytesp, *bytesp; 70 int *ncachep; 71{ 72 ENV *env; 73 MPOOL *mp; 74 75 env = dbenv->env; 76 77 ENV_NOT_CONFIGURED(env, 78 env->mp_handle, "DB_ENV->get_cachesize", DB_INIT_MPOOL); 79 80 if (MPOOL_ON(env)) { 81 /* Cannot be set after open, no lock required to read. */ 82 mp = env->mp_handle->reginfo[0].primary; 83 if (gbytesp != NULL) 84 *gbytesp = mp->stat.st_gbytes; 85 if (bytesp != NULL) 86 *bytesp = mp->stat.st_bytes; 87 if (ncachep != NULL) 88 *ncachep = (int)mp->nreg; 89 } else { 90 if (gbytesp != NULL) 91 *gbytesp = dbenv->mp_gbytes; 92 if (bytesp != NULL) 93 *bytesp = dbenv->mp_bytes; 94 if (ncachep != NULL) 95 *ncachep = (int)dbenv->mp_ncache; 96 } 97 return (0); 98} 99 100/* 101 * __memp_set_cachesize -- 102 * {DB_ENV,DB}->set_cachesize. 103 * 104 * PUBLIC: int __memp_set_cachesize __P((DB_ENV *, u_int32_t, u_int32_t, int)); 105 */ 106int 107__memp_set_cachesize(dbenv, gbytes, bytes, arg_ncache) 108 DB_ENV *dbenv; 109 u_int32_t gbytes, bytes; 110 int arg_ncache; 111{ 112 ENV *env; 113 u_int ncache; 114 115 env = dbenv->env; 116 117 /* Normalize the cache count. */ 118 ncache = arg_ncache <= 0 ? 1 : (u_int)arg_ncache; 119 120 /* 121 * You can only store 4GB-1 in an unsigned 32-bit value, so correct for 122 * applications that specify 4GB cache sizes -- we know what they meant. 123 */ 124 if (sizeof(roff_t) == 4 && gbytes / ncache == 4 && bytes == 0) { 125 --gbytes; 126 bytes = GIGABYTE - 1; 127 } else { 128 gbytes += bytes / GIGABYTE; 129 bytes %= GIGABYTE; 130 } 131 132 /* 133 * !!! 134 * With 32-bit region offsets, individual cache regions must be smaller 135 * than 4GB. Also, cache sizes larger than 10TB would cause 32-bit 136 * wrapping in the calculation of the number of hash buckets. See 137 * __memp_open for details. 138 */ 139 if (!F_ISSET(env, ENV_OPEN_CALLED)) { 140 if (sizeof(roff_t) <= 4 && gbytes / ncache >= 4) { 141 __db_errx(env, 142 "individual cache size too large: maximum is 4GB"); 143 return (EINVAL); 144 } 145 if (gbytes / ncache > 10000) { 146 __db_errx(env, 147 "individual cache size too large: maximum is 10TB"); 148 return (EINVAL); 149 } 150 } 151 152 /* 153 * If the application requested less than 500Mb, increase the cachesize 154 * by 25% and factor in the size of the hash buckets to account for our 155 * overhead. (I'm guessing caches over 500Mb are specifically sized, 156 * that is, it's a large server and the application actually knows how 157 * much memory is available. We only document the 25% overhead number, 158 * not the hash buckets, but I don't see a reason to confuse the issue, 159 * it shouldn't matter to an application.) 160 * 161 * There is a minimum cache size, regardless. 162 */ 163 if (gbytes == 0) { 164 if (bytes < 500 * MEGABYTE) 165 bytes += (bytes / 4) + 37 * sizeof(DB_MPOOL_HASH); 166 if (bytes / ncache < DB_CACHESIZE_MIN) 167 bytes = ncache * DB_CACHESIZE_MIN; 168 } 169 170 if (F_ISSET(env, ENV_OPEN_CALLED)) 171 return (__memp_resize(env->mp_handle, gbytes, bytes)); 172 173 dbenv->mp_gbytes = gbytes; 174 dbenv->mp_bytes = bytes; 175 dbenv->mp_ncache = ncache; 176 177 return (0); 178} 179 180/* 181 * __memp_set_config -- 182 * Set the cache subsystem configuration. 183 * 184 * PUBLIC: int __memp_set_config __P((DB_ENV *, u_int32_t, int)); 185 */ 186int 187__memp_set_config(dbenv, which, on) 188 DB_ENV *dbenv; 189 u_int32_t which; 190 int on; 191{ 192 DB_MPOOL *dbmp; 193 ENV *env; 194 MPOOL *mp; 195 196 env = dbenv->env; 197 198 ENV_NOT_CONFIGURED(env, 199 env->mp_handle, "DB_ENV->memp_set_config", DB_INIT_MPOOL); 200 201 switch (which) { 202 case DB_MEMP_SUPPRESS_WRITE: 203 case DB_MEMP_SYNC_INTERRUPT: 204 if (MPOOL_ON(env)) { 205 dbmp = env->mp_handle; 206 mp = dbmp->reginfo[0].primary; 207 if (on) 208 FLD_SET(mp->config_flags, which); 209 else 210 FLD_CLR(mp->config_flags, which); 211 } 212 break; 213 default: 214 return (EINVAL); 215 } 216 return (0); 217} 218 219/* 220 * __memp_get_config -- 221 * Return the cache subsystem configuration. 222 * 223 * PUBLIC: int __memp_get_config __P((DB_ENV *, u_int32_t, int *)); 224 */ 225int 226__memp_get_config(dbenv, which, onp) 227 DB_ENV *dbenv; 228 u_int32_t which; 229 int *onp; 230{ 231 DB_MPOOL *dbmp; 232 ENV *env; 233 MPOOL *mp; 234 235 env = dbenv->env; 236 237 ENV_REQUIRES_CONFIG(env, 238 env->mp_handle, "DB_ENV->memp_get_config", DB_INIT_MPOOL); 239 240 switch (which) { 241 case DB_MEMP_SUPPRESS_WRITE: 242 case DB_MEMP_SYNC_INTERRUPT: 243 if (MPOOL_ON(env)) { 244 dbmp = env->mp_handle; 245 mp = dbmp->reginfo[0].primary; 246 *onp = FLD_ISSET(mp->config_flags, which) ? 1 : 0; 247 } else 248 *onp = 0; 249 break; 250 default: 251 return (EINVAL); 252 } 253 return (0); 254} 255 256/* 257 * PUBLIC: int __memp_get_mp_max_openfd __P((DB_ENV *, int *)); 258 */ 259int 260__memp_get_mp_max_openfd(dbenv, maxopenfdp) 261 DB_ENV *dbenv; 262 int *maxopenfdp; 263{ 264 DB_MPOOL *dbmp; 265 DB_THREAD_INFO *ip; 266 ENV *env; 267 MPOOL *mp; 268 269 env = dbenv->env; 270 271 ENV_NOT_CONFIGURED(env, 272 env->mp_handle, "DB_ENV->get_mp_max_openfd", DB_INIT_MPOOL); 273 274 if (MPOOL_ON(env)) { 275 dbmp = env->mp_handle; 276 mp = dbmp->reginfo[0].primary; 277 ENV_ENTER(env, ip); 278 MPOOL_SYSTEM_LOCK(env); 279 *maxopenfdp = mp->mp_maxopenfd; 280 MPOOL_SYSTEM_UNLOCK(env); 281 ENV_LEAVE(env, ip); 282 } else 283 *maxopenfdp = dbenv->mp_maxopenfd; 284 return (0); 285} 286 287/* 288 * __memp_set_mp_max_openfd -- 289 * Set the maximum number of open fd's when flushing the cache. 290 * PUBLIC: int __memp_set_mp_max_openfd __P((DB_ENV *, int)); 291 */ 292int 293__memp_set_mp_max_openfd(dbenv, maxopenfd) 294 DB_ENV *dbenv; 295 int maxopenfd; 296{ 297 DB_MPOOL *dbmp; 298 DB_THREAD_INFO *ip; 299 ENV *env; 300 MPOOL *mp; 301 302 env = dbenv->env; 303 304 ENV_NOT_CONFIGURED(env, 305 env->mp_handle, "DB_ENV->set_mp_max_openfd", DB_INIT_MPOOL); 306 307 if (MPOOL_ON(env)) { 308 dbmp = env->mp_handle; 309 mp = dbmp->reginfo[0].primary; 310 ENV_ENTER(env, ip); 311 MPOOL_SYSTEM_LOCK(env); 312 mp->mp_maxopenfd = maxopenfd; 313 MPOOL_SYSTEM_UNLOCK(env); 314 ENV_LEAVE(env, ip); 315 } else 316 dbenv->mp_maxopenfd = maxopenfd; 317 return (0); 318} 319 320/* 321 * PUBLIC: int __memp_get_mp_max_write __P((DB_ENV *, int *, db_timeout_t *)); 322 */ 323int 324__memp_get_mp_max_write(dbenv, maxwritep, maxwrite_sleepp) 325 DB_ENV *dbenv; 326 int *maxwritep; 327 db_timeout_t *maxwrite_sleepp; 328{ 329 DB_MPOOL *dbmp; 330 DB_THREAD_INFO *ip; 331 ENV *env; 332 MPOOL *mp; 333 334 env = dbenv->env; 335 336 ENV_NOT_CONFIGURED(env, 337 env->mp_handle, "DB_ENV->get_mp_max_write", DB_INIT_MPOOL); 338 339 if (MPOOL_ON(env)) { 340 dbmp = env->mp_handle; 341 mp = dbmp->reginfo[0].primary; 342 ENV_ENTER(env, ip); 343 MPOOL_SYSTEM_LOCK(env); 344 *maxwritep = mp->mp_maxwrite; 345 *maxwrite_sleepp = mp->mp_maxwrite_sleep; 346 MPOOL_SYSTEM_UNLOCK(env); 347 ENV_LEAVE(env, ip); 348 } else { 349 *maxwritep = dbenv->mp_maxwrite; 350 *maxwrite_sleepp = dbenv->mp_maxwrite_sleep; 351 } 352 return (0); 353} 354 355/* 356 * __memp_set_mp_max_write -- 357 * Set the maximum continuous I/O count. 358 * 359 * PUBLIC: int __memp_set_mp_max_write __P((DB_ENV *, int, db_timeout_t)); 360 */ 361int 362__memp_set_mp_max_write(dbenv, maxwrite, maxwrite_sleep) 363 DB_ENV *dbenv; 364 int maxwrite; 365 db_timeout_t maxwrite_sleep; 366{ 367 DB_MPOOL *dbmp; 368 DB_THREAD_INFO *ip; 369 ENV *env; 370 MPOOL *mp; 371 372 env = dbenv->env; 373 374 ENV_NOT_CONFIGURED(env, 375 env->mp_handle, "DB_ENV->get_mp_max_write", DB_INIT_MPOOL); 376 377 if (MPOOL_ON(env)) { 378 dbmp = env->mp_handle; 379 mp = dbmp->reginfo[0].primary; 380 ENV_ENTER(env, ip); 381 MPOOL_SYSTEM_LOCK(env); 382 mp->mp_maxwrite = maxwrite; 383 mp->mp_maxwrite_sleep = maxwrite_sleep; 384 MPOOL_SYSTEM_UNLOCK(env); 385 ENV_LEAVE(env, ip); 386 } else { 387 dbenv->mp_maxwrite = maxwrite; 388 dbenv->mp_maxwrite_sleep = maxwrite_sleep; 389 } 390 return (0); 391} 392 393/* 394 * PUBLIC: int __memp_get_mp_mmapsize __P((DB_ENV *, size_t *)); 395 */ 396int 397__memp_get_mp_mmapsize(dbenv, mp_mmapsizep) 398 DB_ENV *dbenv; 399 size_t *mp_mmapsizep; 400{ 401 DB_MPOOL *dbmp; 402 DB_THREAD_INFO *ip; 403 ENV *env; 404 MPOOL *mp; 405 406 env = dbenv->env; 407 408 ENV_NOT_CONFIGURED(env, 409 env->mp_handle, "DB_ENV->get_mp_max_mmapsize", DB_INIT_MPOOL); 410 411 if (MPOOL_ON(env)) { 412 dbmp = env->mp_handle; 413 mp = dbmp->reginfo[0].primary; 414 ENV_ENTER(env, ip); 415 MPOOL_SYSTEM_LOCK(env); 416 *mp_mmapsizep = mp->mp_mmapsize; 417 MPOOL_SYSTEM_UNLOCK(env); 418 ENV_LEAVE(env, ip); 419 } else 420 *mp_mmapsizep = dbenv->mp_mmapsize; 421 return (0); 422} 423 424/* 425 * __memp_set_mp_mmapsize -- 426 * DB_ENV->set_mp_mmapsize. 427 * 428 * PUBLIC: int __memp_set_mp_mmapsize __P((DB_ENV *, size_t)); 429 */ 430int 431__memp_set_mp_mmapsize(dbenv, mp_mmapsize) 432 DB_ENV *dbenv; 433 size_t mp_mmapsize; 434{ 435 DB_MPOOL *dbmp; 436 DB_THREAD_INFO *ip; 437 ENV *env; 438 MPOOL *mp; 439 440 env = dbenv->env; 441 442 ENV_NOT_CONFIGURED(env, 443 env->mp_handle, "DB_ENV->set_mp_max_mmapsize", DB_INIT_MPOOL); 444 445 if (MPOOL_ON(env)) { 446 dbmp = env->mp_handle; 447 mp = dbmp->reginfo[0].primary; 448 ENV_ENTER(env, ip); 449 MPOOL_SYSTEM_LOCK(env); 450 mp->mp_mmapsize = mp_mmapsize; 451 MPOOL_SYSTEM_UNLOCK(env); 452 ENV_LEAVE(env, ip); 453 } else 454 dbenv->mp_mmapsize = mp_mmapsize; 455 return (0); 456} 457 458/* 459 * PUBLIC: int __memp_get_mp_pagesize __P((DB_ENV *, u_int32_t *)); 460 */ 461int 462__memp_get_mp_pagesize(dbenv, mp_pagesizep) 463 DB_ENV *dbenv; 464 u_int32_t *mp_pagesizep; 465{ 466 ENV *env; 467 468 env = dbenv->env; 469 470 ENV_NOT_CONFIGURED(env, 471 env->mp_handle, "DB_ENV->get_mp_max_pagesize", DB_INIT_MPOOL); 472 473 *mp_pagesizep = dbenv->mp_pagesize; 474 return (0); 475} 476 477/* 478 * __memp_set_mp_pagesize -- 479 * DB_ENV->set_mp_pagesize. 480 * 481 * PUBLIC: int __memp_set_mp_pagesize __P((DB_ENV *, u_int32_t)); 482 */ 483int 484__memp_set_mp_pagesize(dbenv, mp_pagesize) 485 DB_ENV *dbenv; 486 u_int32_t mp_pagesize; 487{ 488 ENV *env; 489 490 env = dbenv->env; 491 492 ENV_NOT_CONFIGURED(env, 493 env->mp_handle, "DB_ENV->get_mp_max_mmapsize", DB_INIT_MPOOL); 494 ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_mp_pagesize"); 495 496 dbenv->mp_pagesize = mp_pagesize; 497 return (0); 498} 499 500/* 501 * PUBLIC: int __memp_get_mp_tablesize __P((DB_ENV *, u_int32_t *)); 502 */ 503int 504__memp_get_mp_tablesize(dbenv, mp_tablesizep) 505 DB_ENV *dbenv; 506 u_int32_t *mp_tablesizep; 507{ 508 ENV *env; 509 510 env = dbenv->env; 511 512 ENV_NOT_CONFIGURED(env, 513 env->mp_handle, "DB_ENV->get_mp_max_tablesize", DB_INIT_MPOOL); 514 515 *mp_tablesizep = dbenv->mp_tablesize; 516 return (0); 517} 518 519/* 520 * __memp_set_mp_tablesize -- 521 * DB_ENV->set_mp_tablesize. 522 * 523 * PUBLIC: int __memp_set_mp_tablesize __P((DB_ENV *, u_int32_t)); 524 */ 525int 526__memp_set_mp_tablesize(dbenv, mp_tablesize) 527 DB_ENV *dbenv; 528 u_int32_t mp_tablesize; 529{ 530 ENV *env; 531 532 env = dbenv->env; 533 534 ENV_NOT_CONFIGURED(env, 535 env->mp_handle, "DB_ENV->get_mp_max_mmapsize", DB_INIT_MPOOL); 536 ENV_ILLEGAL_AFTER_OPEN(env, "DB_ENV->set_mp_tablesize"); 537 538 dbenv->mp_tablesize = mp_tablesize; 539 return (0); 540} 541 542/* 543 * __memp_nameop 544 * Remove or rename a file in the pool. 545 * 546 * PUBLIC: int __memp_nameop __P((ENV *, 547 * PUBLIC: u_int8_t *, const char *, const char *, const char *, int)); 548 * 549 * XXX 550 * Undocumented interface: DB private. 551 */ 552int 553__memp_nameop(env, fileid, newname, fullold, fullnew, inmem) 554 ENV *env; 555 u_int8_t *fileid; 556 const char *newname, *fullold, *fullnew; 557 int inmem; 558{ 559 DB_MPOOL *dbmp; 560 DB_MPOOL_HASH *hp, *nhp; 561 MPOOL *mp; 562 MPOOLFILE *mfp; 563 roff_t newname_off; 564 u_int32_t bucket; 565 int locked, ret; 566 size_t nlen; 567 void *p; 568 569#undef op_is_remove 570#define op_is_remove (newname == NULL) 571 572 COMPQUIET(bucket, 0); 573 COMPQUIET(hp, NULL); 574 COMPQUIET(newname_off, 0); 575 COMPQUIET(nlen, 0); 576 577 dbmp = NULL; 578 mfp = NULL; 579 nhp = NULL; 580 p = NULL; 581 locked = ret = 0; 582 583 if (!MPOOL_ON(env)) 584 goto fsop; 585 586 dbmp = env->mp_handle; 587 mp = dbmp->reginfo[0].primary; 588 hp = R_ADDR(dbmp->reginfo, mp->ftab); 589 590 if (!op_is_remove) { 591 nlen = strlen(newname); 592 if ((ret = __memp_alloc(dbmp, dbmp->reginfo, 593 NULL, nlen + 1, &newname_off, &p)) != 0) 594 return (ret); 595 memcpy(p, newname, nlen + 1); 596 } 597 598 /* 599 * Remove or rename a file that the mpool might know about. We assume 600 * that the fop layer has the file locked for exclusive access, so we 601 * don't worry about locking except for the mpool mutexes. Checkpoint 602 * can happen at any time, independent of file locking, so we have to 603 * do the actual unlink or rename system call while holding 604 * all affected buckets locked. 605 * 606 * If this is a rename and this is a memory file then we need 607 * to make sure that the new name does not exist. Since we 608 * are locking two buckets lock them in ascending order. 609 */ 610 if (inmem) { 611 DB_ASSERT(env, fullold != NULL); 612 hp += FNBUCKET(fullold, strlen(fullold)); 613 if (!op_is_remove) { 614 bucket = FNBUCKET(newname, nlen); 615 nhp = R_ADDR(dbmp->reginfo, mp->ftab); 616 nhp += bucket; 617 } 618 } else 619 hp += FNBUCKET(fileid, DB_FILE_ID_LEN); 620 621 if (nhp != NULL && nhp < hp) 622 MUTEX_LOCK(env, nhp->mtx_hash); 623 MUTEX_LOCK(env, hp->mtx_hash); 624 if (nhp != NULL && nhp > hp) 625 MUTEX_LOCK(env, nhp->mtx_hash); 626 locked = 1; 627 628 if (!op_is_remove && inmem) { 629 SH_TAILQ_FOREACH(mfp, &nhp->hash_bucket, q, __mpoolfile) 630 if (!mfp->deadfile && 631 mfp->no_backing_file && strcmp(newname, 632 R_ADDR(dbmp->reginfo, mfp->path_off)) == 0) 633 break; 634 if (mfp != NULL) { 635 ret = EEXIST; 636 goto err; 637 } 638 } 639 640 /* 641 * Find the file -- if mpool doesn't know about this file, that may 642 * not be an error. 643 */ 644 SH_TAILQ_FOREACH(mfp, &hp->hash_bucket, q, __mpoolfile) { 645 /* Ignore non-active files. */ 646 if (mfp->deadfile || F_ISSET(mfp, MP_TEMP)) 647 continue; 648 649 /* Try to match on fileid. */ 650 if (memcmp(fileid, R_ADDR( 651 dbmp->reginfo, mfp->fileid_off), DB_FILE_ID_LEN) != 0) 652 continue; 653 654 break; 655 } 656 657 if (mfp == NULL) { 658 if (inmem) { 659 ret = ENOENT; 660 goto err; 661 } 662 goto fsop; 663 } 664 665 if (op_is_remove) { 666 MUTEX_LOCK(env, mfp->mutex); 667 /* 668 * In-memory dbs have an artificially incremented ref count so 669 * they do not get reclaimed as long as they exist. Since we 670 * are now deleting the database, we need to dec that count. 671 */ 672 if (mfp->no_backing_file) 673 mfp->mpf_cnt--; 674 mfp->deadfile = 1; 675 MUTEX_UNLOCK(env, mfp->mutex); 676 } else { 677 /* 678 * Else, it's a rename. We've allocated memory for the new 679 * name. Swap it with the old one. If it's in memory we 680 * need to move it the right bucket. 681 */ 682 p = R_ADDR(dbmp->reginfo, mfp->path_off); 683 mfp->path_off = newname_off; 684 685 if (inmem && hp != nhp) { 686 DB_ASSERT(env, nhp != NULL); 687 SH_TAILQ_REMOVE(&hp->hash_bucket, mfp, q, __mpoolfile); 688 mfp->bucket = bucket; 689 SH_TAILQ_INSERT_TAIL(&nhp->hash_bucket, mfp, q); 690 } 691 } 692 693fsop: /* 694 * If this is a real file, then mfp could be NULL, because 695 * mpool isn't turned on, and we still need to do the file ops. 696 */ 697 if (mfp == NULL || !mfp->no_backing_file) { 698 if (op_is_remove) { 699 /* 700 * !!! 701 * Replication may ask us to unlink a file that's been 702 * renamed. Don't complain if it doesn't exist. 703 */ 704 if ((ret = __os_unlink(env, fullold, 0)) == ENOENT) 705 ret = 0; 706 } else { 707 /* 708 * Defensive only, fullnew should never be 709 * NULL. 710 */ 711 DB_ASSERT(env, fullnew != NULL); 712 if (fullnew == NULL) { 713 ret = EINVAL; 714 goto err; 715 } 716 ret = __os_rename(env, fullold, fullnew, 1); 717 } 718 } 719 720 /* Delete the memory we no longer need. */ 721err: if (p != NULL) { 722 MPOOL_REGION_LOCK(env, &dbmp->reginfo[0]); 723 __memp_free(&dbmp->reginfo[0], p); 724 MPOOL_REGION_UNLOCK(env, &dbmp->reginfo[0]); 725 } 726 727 /* If we have buckets locked, unlock them when done moving files. */ 728 if (locked == 1) { 729 MUTEX_UNLOCK(env, hp->mtx_hash); 730 if (nhp != NULL && nhp != hp) 731 MUTEX_UNLOCK(env, nhp->mtx_hash); 732 } 733 return (ret); 734} 735 736/* 737 * __memp_ftruncate __ 738 * Truncate the file. 739 * 740 * PUBLIC: int __memp_ftruncate __P((DB_MPOOLFILE *, DB_TXN *, 741 * PUBLIC: DB_THREAD_INFO *, db_pgno_t, u_int32_t)); 742 */ 743int 744__memp_ftruncate(dbmfp, txn, ip, pgno, flags) 745 DB_MPOOLFILE *dbmfp; 746 DB_TXN *txn; 747 DB_THREAD_INFO *ip; 748 db_pgno_t pgno; 749 u_int32_t flags; 750{ 751 ENV *env; 752 MPOOLFILE *mfp; 753 void *pagep; 754 db_pgno_t last_pgno, pg; 755 int ret; 756 757 env = dbmfp->env; 758 mfp = dbmfp->mfp; 759 ret = 0; 760 761 MUTEX_LOCK(env, mfp->mutex); 762 last_pgno = mfp->last_pgno; 763 MUTEX_UNLOCK(env, mfp->mutex); 764 765 if (pgno > last_pgno) { 766 if (LF_ISSET(MP_TRUNC_RECOVER)) 767 return (0); 768 __db_errx(env, "Truncate beyond the end of file"); 769 return (EINVAL); 770 } 771 772 pg = pgno; 773 do { 774 if (mfp->block_cnt == 0) 775 break; 776 if ((ret = __memp_fget(dbmfp, &pg, 777 ip, txn, DB_MPOOL_FREE, &pagep)) != 0) 778 return (ret); 779 } while (pg++ < last_pgno); 780 781 /* 782 * If we are aborting an extend of a file, the call to __os_truncate 783 * could extend the file if the new page(s) had not yet been 784 * written to disk. We do not want to extend the file to pages 785 * whose log records are not yet flushed [#14031]. In addition if 786 * we are out of disk space we can generate an error [#12743]. 787 */ 788 MUTEX_LOCK(env, mfp->mutex); 789 if (!F_ISSET(mfp, MP_TEMP) && 790 !mfp->no_backing_file && pgno <= mfp->last_flushed_pgno) 791#ifdef HAVE_FTRUNCATE 792 ret = __os_truncate(env, 793 dbmfp->fhp, pgno, mfp->stat.st_pagesize); 794#else 795 ret = __db_zero_extend(env, 796 dbmfp->fhp, pgno, mfp->last_pgno, mfp->stat.st_pagesize); 797#endif 798 799 /* 800 * This set could race with another thread of control that extending 801 * the file. It's not a problem because we should have the page 802 * locked at a higher level of the system. 803 */ 804 if (ret == 0) { 805 mfp->last_pgno = pgno - 1; 806 if (mfp->last_flushed_pgno > mfp->last_pgno) 807 mfp->last_flushed_pgno = mfp->last_pgno; 808 } 809 MUTEX_UNLOCK(env, mfp->mutex); 810 811 return (ret); 812} 813 814#ifdef HAVE_FTRUNCATE 815/* 816 * Support routines for maintaining a sorted freelist while we try to rearrange 817 * and truncate the file. 818 */ 819 820/* 821 * __memp_alloc_freelist -- 822 * Allocate mpool space for the freelist. 823 * 824 * PUBLIC: int __memp_alloc_freelist __P((DB_MPOOLFILE *, 825 * PUBLIC: u_int32_t, db_pgno_t **)); 826 */ 827int 828__memp_alloc_freelist(dbmfp, nelems, listp) 829 DB_MPOOLFILE *dbmfp; 830 u_int32_t nelems; 831 db_pgno_t **listp; 832{ 833 DB_MPOOL *dbmp; 834 ENV *env; 835 MPOOLFILE *mfp; 836 void *retp; 837 int ret; 838 839 env = dbmfp->env; 840 dbmp = env->mp_handle; 841 mfp = dbmfp->mfp; 842 843 *listp = NULL; 844 845 /* 846 * These fields are protected because the database layer 847 * has the metapage locked while manipulating them. 848 */ 849 mfp->free_ref++; 850 if (mfp->free_size != 0) 851 return (EBUSY); 852 853 /* Allocate at least a few slots. */ 854 mfp->free_cnt = nelems; 855 if (nelems == 0) 856 nelems = 50; 857 858 if ((ret = __memp_alloc(dbmp, dbmp->reginfo, 859 NULL, nelems * sizeof(db_pgno_t), &mfp->free_list, &retp)) != 0) 860 return (ret); 861 862 mfp->free_size = nelems * sizeof(db_pgno_t); 863 *listp = retp; 864 return (0); 865} 866 867/* 868 * __memp_free_freelist -- 869 * Free the list. 870 * 871 * PUBLIC: int __memp_free_freelist __P((DB_MPOOLFILE *)); 872 */ 873int 874__memp_free_freelist(dbmfp) 875 DB_MPOOLFILE *dbmfp; 876{ 877 DB_MPOOL *dbmp; 878 ENV *env; 879 MPOOLFILE *mfp; 880 881 env = dbmfp->env; 882 dbmp = env->mp_handle; 883 mfp = dbmfp->mfp; 884 885 DB_ASSERT(env, mfp->free_ref > 0); 886 if (--mfp->free_ref > 0) 887 return (0); 888 889 DB_ASSERT(env, mfp->free_size != 0); 890 891 MPOOL_SYSTEM_LOCK(env); 892 __memp_free(dbmp->reginfo, R_ADDR(dbmp->reginfo, mfp->free_list)); 893 MPOOL_SYSTEM_UNLOCK(env); 894 895 mfp->free_cnt = 0; 896 mfp->free_list = 0; 897 mfp->free_size = 0; 898 return (0); 899} 900 901/* 902 * __memp_get_freelst -- 903 * Return current list. 904 * 905 * PUBLIC: int __memp_get_freelist __P(( 906 * PUBLIC: DB_MPOOLFILE *, u_int32_t *, db_pgno_t **)); 907 */ 908int 909__memp_get_freelist(dbmfp, nelemp, listp) 910 DB_MPOOLFILE *dbmfp; 911 u_int32_t *nelemp; 912 db_pgno_t **listp; 913{ 914 DB_MPOOL *dbmp; 915 ENV *env; 916 MPOOLFILE *mfp; 917 918 env = dbmfp->env; 919 dbmp = env->mp_handle; 920 mfp = dbmfp->mfp; 921 922 if (mfp->free_size == 0) { 923 *nelemp = 0; 924 *listp = NULL; 925 } else { 926 *nelemp = mfp->free_cnt; 927 *listp = R_ADDR(dbmp->reginfo, mfp->free_list); 928 } 929 930 return (0); 931} 932 933/* 934 * __memp_extend_freelist -- 935 * Extend the list. 936 * 937 * PUBLIC: int __memp_extend_freelist __P(( 938 * PUBLIC: DB_MPOOLFILE *, u_int32_t , db_pgno_t **)); 939 */ 940int 941__memp_extend_freelist(dbmfp, count, listp) 942 DB_MPOOLFILE *dbmfp; 943 u_int32_t count; 944 db_pgno_t **listp; 945{ 946 DB_MPOOL *dbmp; 947 ENV *env; 948 MPOOLFILE *mfp; 949 int ret; 950 void *retp; 951 952 env = dbmfp->env; 953 dbmp = env->mp_handle; 954 mfp = dbmfp->mfp; 955 956 if (mfp->free_size == 0) 957 return (EINVAL); 958 959 if (count * sizeof(db_pgno_t) > mfp->free_size) { 960 mfp->free_size = 961 (size_t)DB_ALIGN(count * sizeof(db_pgno_t), 512); 962 *listp = R_ADDR(dbmp->reginfo, mfp->free_list); 963 if ((ret = __memp_alloc(dbmp, dbmp->reginfo, 964 NULL, mfp->free_size, &mfp->free_list, &retp)) != 0) 965 return (ret); 966 967 memcpy(retp, *listp, mfp->free_cnt * sizeof(db_pgno_t)); 968 969 MPOOL_SYSTEM_LOCK(env); 970 __memp_free(dbmp->reginfo, *listp); 971 MPOOL_SYSTEM_UNLOCK(env); 972 } 973 974 mfp->free_cnt = count; 975 *listp = R_ADDR(dbmp->reginfo, mfp->free_list); 976 977 return (0); 978} 979#endif 980 981/* 982 * __memp_set_last_pgno -- set the last page of the file 983 * 984 * PUBLIC: void __memp_set_last_pgno __P((DB_MPOOLFILE *, db_pgno_t)); 985 */ 986void 987__memp_set_last_pgno(dbmfp, pgno) 988 DB_MPOOLFILE *dbmfp; 989 db_pgno_t pgno; 990{ 991 dbmfp->mfp->last_pgno = pgno; 992} 993