1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996,2008 Oracle. All rights reserved. 5 * 6 * $Id: lock_region.c,v 12.24 2008/03/13 14:41:19 mbrey Exp $ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/lock.h" 13 14static int __lock_region_init __P((ENV *, DB_LOCKTAB *)); 15static size_t 16 __lock_region_size __P((ENV *)); 17 18/* 19 * The conflict arrays are set up such that the row is the lock you are 20 * holding and the column is the lock that is desired. 21 */ 22#define DB_LOCK_RIW_N 9 23static const u_int8_t db_riw_conflicts[] = { 24/* N R W WT IW IR RIW DR WW */ 25/* N */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 26/* R */ 0, 0, 1, 0, 1, 0, 1, 0, 1, 27/* W */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 28/* WT */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 29/* IW */ 0, 1, 1, 0, 0, 0, 0, 1, 1, 30/* IR */ 0, 0, 1, 0, 0, 0, 0, 0, 1, 31/* RIW */ 0, 1, 1, 0, 0, 0, 0, 1, 1, 32/* DR */ 0, 0, 1, 0, 1, 0, 1, 0, 0, 33/* WW */ 0, 1, 1, 0, 1, 1, 1, 0, 1 34}; 35 36/* 37 * This conflict array is used for concurrent db access (CDB). It uses 38 * the same locks as the db_riw_conflicts array, but adds an IW mode to 39 * be used for write cursors. 40 */ 41#define DB_LOCK_CDB_N 5 42static const u_int8_t db_cdb_conflicts[] = { 43 /* N R W WT IW */ 44 /* N */ 0, 0, 0, 0, 0, 45 /* R */ 0, 0, 1, 0, 0, 46 /* W */ 0, 1, 1, 1, 1, 47 /* WT */ 0, 0, 0, 0, 0, 48 /* IW */ 0, 0, 1, 0, 1 49}; 50 51/* 52 * __lock_open -- 53 * Internal version of lock_open: only called from ENV->open. 54 * 55 * PUBLIC: int __lock_open __P((ENV *, int)); 56 */ 57int 58__lock_open(env, create_ok) 59 ENV *env; 60 int create_ok; 61{ 62 DB_ENV *dbenv; 63 DB_LOCKREGION *region; 64 DB_LOCKTAB *lt; 65 size_t size; 66 int region_locked, ret; 67 68 dbenv = env->dbenv; 69 region_locked = 0; 70 71 /* Create the lock table structure. */ 72 if ((ret = __os_calloc(env, 1, sizeof(DB_LOCKTAB), <)) != 0) 73 return (ret); 74 lt->env = env; 75 76 /* Join/create the lock region. */ 77 lt->reginfo.env = env; 78 lt->reginfo.type = REGION_TYPE_LOCK; 79 lt->reginfo.id = INVALID_REGION_ID; 80 lt->reginfo.flags = REGION_JOIN_OK; 81 if (create_ok) 82 F_SET(<->reginfo, REGION_CREATE_OK); 83 84 /* Make sure there is at least one object and lock per partition. */ 85 if (dbenv->lk_max_objects < dbenv->lk_partitions) 86 dbenv->lk_max_objects = dbenv->lk_partitions; 87 if (dbenv->lk_max < dbenv->lk_partitions) 88 dbenv->lk_max = dbenv->lk_partitions; 89 size = __lock_region_size(env); 90 if ((ret = __env_region_attach(env, <->reginfo, size)) != 0) 91 goto err; 92 93 /* If we created the region, initialize it. */ 94 if (F_ISSET(<->reginfo, REGION_CREATE)) 95 if ((ret = __lock_region_init(env, lt)) != 0) 96 goto err; 97 98 /* Set the local addresses. */ 99 region = lt->reginfo.primary = 100 R_ADDR(<->reginfo, lt->reginfo.rp->primary); 101 102 /* Set remaining pointers into region. */ 103 lt->conflicts = R_ADDR(<->reginfo, region->conf_off); 104 lt->obj_tab = R_ADDR(<->reginfo, region->obj_off); 105#ifdef HAVE_STATISTICS 106 lt->obj_stat = R_ADDR(<->reginfo, region->stat_off); 107#endif 108 lt->part_array = R_ADDR(<->reginfo, region->part_off); 109 lt->locker_tab = R_ADDR(<->reginfo, region->locker_off); 110 111 env->lk_handle = lt; 112 113 LOCK_REGION_LOCK(env); 114 region_locked = 1; 115 116 if (dbenv->lk_detect != DB_LOCK_NORUN) { 117 /* 118 * Check for incompatible automatic deadlock detection requests. 119 * There are scenarios where changing the detector configuration 120 * is reasonable, but we disallow them guessing it is likely to 121 * be an application error. 122 * 123 * We allow applications to turn on the lock detector, and we 124 * ignore attempts to set it to the default or current value. 125 */ 126 if (region->detect != DB_LOCK_NORUN && 127 dbenv->lk_detect != DB_LOCK_DEFAULT && 128 region->detect != dbenv->lk_detect) { 129 __db_errx(env, 130 "lock_open: incompatible deadlock detector mode"); 131 ret = EINVAL; 132 goto err; 133 } 134 if (region->detect == DB_LOCK_NORUN) 135 region->detect = dbenv->lk_detect; 136 } 137 138 /* 139 * A process joining the region may have reset the lock and transaction 140 * timeouts. 141 */ 142 if (dbenv->lk_timeout != 0) 143 region->lk_timeout = dbenv->lk_timeout; 144 if (dbenv->tx_timeout != 0) 145 region->tx_timeout = dbenv->tx_timeout; 146 147 LOCK_REGION_UNLOCK(env); 148 region_locked = 0; 149 150 return (0); 151 152err: env->lk_handle = NULL; 153 if (lt->reginfo.addr != NULL) { 154 if (region_locked) 155 LOCK_REGION_UNLOCK(env); 156 (void)__env_region_detach(env, <->reginfo, 0); 157 } 158 159 __os_free(env, lt); 160 return (ret); 161} 162 163/* 164 * __lock_region_init -- 165 * Initialize the lock region. 166 */ 167static int 168__lock_region_init(env, lt) 169 ENV *env; 170 DB_LOCKTAB *lt; 171{ 172 const u_int8_t *lk_conflicts; 173 struct __db_lock *lp; 174 DB_ENV *dbenv; 175 DB_LOCKER *lidp; 176 DB_LOCKOBJ *op; 177 DB_LOCKREGION *region; 178 DB_LOCKPART *part; 179 u_int32_t extra_locks, extra_objects, i, j, max; 180 u_int8_t *addr; 181 int lk_modes, ret; 182 183 dbenv = env->dbenv; 184 185 if ((ret = __env_alloc(<->reginfo, 186 sizeof(DB_LOCKREGION), <->reginfo.primary)) != 0) 187 goto mem_err; 188 lt->reginfo.rp->primary = R_OFFSET(<->reginfo, lt->reginfo.primary); 189 region = lt->reginfo.primary; 190 memset(region, 0, sizeof(*region)); 191 192 if ((ret = __mutex_alloc( 193 env, MTX_LOCK_REGION, 0, ®ion->mtx_region)) != 0) 194 return (ret); 195 196 /* Select a conflict matrix if none specified. */ 197 if (dbenv->lk_modes == 0) 198 if (CDB_LOCKING(env)) { 199 lk_modes = DB_LOCK_CDB_N; 200 lk_conflicts = db_cdb_conflicts; 201 } else { 202 lk_modes = DB_LOCK_RIW_N; 203 lk_conflicts = db_riw_conflicts; 204 } 205 else { 206 lk_modes = dbenv->lk_modes; 207 lk_conflicts = dbenv->lk_conflicts; 208 } 209 210 region->need_dd = 0; 211 timespecclear(®ion->next_timeout); 212 region->detect = DB_LOCK_NORUN; 213 region->lk_timeout = dbenv->lk_timeout; 214 region->tx_timeout = dbenv->tx_timeout; 215 region->locker_t_size = __db_tablesize(dbenv->lk_max_lockers); 216 region->object_t_size = __db_tablesize(dbenv->lk_max_objects); 217 region->part_t_size = dbenv->lk_partitions; 218 memset(®ion->stat, 0, sizeof(region->stat)); 219 region->stat.st_id = 0; 220 region->stat.st_cur_maxid = DB_LOCK_MAXID; 221 region->stat.st_maxlocks = dbenv->lk_max; 222 region->stat.st_maxlockers = dbenv->lk_max_lockers; 223 region->stat.st_maxobjects = dbenv->lk_max_objects; 224 region->stat.st_partitions = dbenv->lk_partitions; 225 region->stat.st_nmodes = lk_modes; 226 227 /* Allocate room for the conflict matrix and initialize it. */ 228 if ((ret = __env_alloc( 229 <->reginfo, (size_t)(lk_modes * lk_modes), &addr)) != 0) 230 goto mem_err; 231 memcpy(addr, lk_conflicts, (size_t)(lk_modes * lk_modes)); 232 region->conf_off = R_OFFSET(<->reginfo, addr); 233 234 /* Allocate room for the object hash table and initialize it. */ 235 if ((ret = __env_alloc(<->reginfo, 236 region->object_t_size * sizeof(DB_HASHTAB), &addr)) != 0) 237 goto mem_err; 238 __db_hashinit(addr, region->object_t_size); 239 region->obj_off = R_OFFSET(<->reginfo, addr); 240 241 /* Allocate room for the object hash stats table and initialize it. */ 242 if ((ret = __env_alloc(<->reginfo, 243 region->object_t_size * sizeof(DB_LOCK_HSTAT), &addr)) != 0) 244 goto mem_err; 245 memset(addr, 0, region->object_t_size * sizeof(DB_LOCK_HSTAT)); 246 region->stat_off = R_OFFSET(<->reginfo, addr); 247 248 /* Allocate room for the partition table and initialize its mutexes. */ 249 if ((ret = __env_alloc(<->reginfo, 250 region->part_t_size * sizeof(DB_LOCKPART), &part)) != 0) 251 goto mem_err; 252 memset(part, 0, region->part_t_size * sizeof(DB_LOCKPART)); 253 region->part_off = R_OFFSET(<->reginfo, part); 254 for (i = 0; i < region->part_t_size; i++) { 255 if ((ret = __mutex_alloc( 256 env, MTX_LOCK_REGION, 0, &part[i].mtx_part)) != 0) 257 return (ret); 258 } 259 if ((ret = __mutex_alloc( 260 env, MTX_LOCK_REGION, 0, ®ion->mtx_dd)) != 0) 261 return (ret); 262 263 if ((ret = __mutex_alloc( 264 env, MTX_LOCK_REGION, 0, ®ion->mtx_lockers)) != 0) 265 return (ret); 266 267 /* Allocate room for the locker hash table and initialize it. */ 268 if ((ret = __env_alloc(<->reginfo, 269 region->locker_t_size * sizeof(DB_HASHTAB), &addr)) != 0) 270 goto mem_err; 271 __db_hashinit(addr, region->locker_t_size); 272 region->locker_off = R_OFFSET(<->reginfo, addr); 273 274 SH_TAILQ_INIT(®ion->dd_objs); 275 276 /* 277 * If the locks and objects don't divide evenly, spread them around. 278 */ 279 extra_locks = region->stat.st_maxlocks - 280 ((region->stat.st_maxlocks / region->part_t_size) * 281 region->part_t_size); 282 extra_objects = region->stat.st_maxobjects - 283 ((region->stat.st_maxobjects / region->part_t_size) * 284 region->part_t_size); 285 for (j = 0; j < region->part_t_size; j++) { 286 /* Initialize locks onto a free list. */ 287 SH_TAILQ_INIT(&part[j].free_locks); 288 max = region->stat.st_maxlocks / region->part_t_size; 289 if (extra_locks > 0) { 290 max++; 291 extra_locks--; 292 } 293 for (i = 0; i < max; ++i) { 294 if ((ret = __env_alloc(<->reginfo, 295 sizeof(struct __db_lock), &lp)) != 0) 296 goto mem_err; 297 lp->mtx_lock = MUTEX_INVALID; 298 lp->gen = 0; 299 lp->status = DB_LSTAT_FREE; 300 SH_TAILQ_INSERT_HEAD( 301 &part[j].free_locks, lp, links, __db_lock); 302 } 303 /* Initialize objects onto a free list. */ 304 max = region->stat.st_maxobjects / region->part_t_size; 305 if (extra_objects > 0) { 306 max++; 307 extra_objects--; 308 } 309 SH_TAILQ_INIT(&part[j].free_objs); 310 for (i = 0; i < max; ++i) { 311 if ((ret = __env_alloc(<->reginfo, 312 sizeof(DB_LOCKOBJ), &op)) != 0) 313 goto mem_err; 314 SH_TAILQ_INSERT_HEAD( 315 &part[j].free_objs, op, links, __db_lockobj); 316 op->generation = 0; 317 } 318 } 319 320 /* Initialize lockers onto a free list. */ 321 SH_TAILQ_INIT(®ion->lockers); 322 SH_TAILQ_INIT(®ion->free_lockers); 323 for (i = 0; i < region->stat.st_maxlockers; ++i) { 324 if ((ret = 325 __env_alloc(<->reginfo, sizeof(DB_LOCKER), &lidp)) != 0) { 326mem_err: __db_errx(env, 327 "unable to allocate memory for the lock table"); 328 return (ret); 329 } 330 SH_TAILQ_INSERT_HEAD( 331 ®ion->free_lockers, lidp, links, __db_locker); 332 } 333 334 return (0); 335} 336 337/* 338 * __lock_env_refresh -- 339 * Clean up after the lock system on a close or failed open. 340 * 341 * PUBLIC: int __lock_env_refresh __P((ENV *)); 342 */ 343int 344__lock_env_refresh(env) 345 ENV *env; 346{ 347 struct __db_lock *lp; 348 DB_LOCKER *locker; 349 DB_LOCKOBJ *lockobj; 350 DB_LOCKREGION *lr; 351 DB_LOCKTAB *lt; 352 REGINFO *reginfo; 353 u_int32_t j; 354 int ret; 355 356 lt = env->lk_handle; 357 reginfo = <->reginfo; 358 lr = reginfo->primary; 359 360 /* 361 * If a private region, return the memory to the heap. Not needed for 362 * filesystem-backed or system shared memory regions, that memory isn't 363 * owned by any particular process. 364 */ 365 if (F_ISSET(env, ENV_PRIVATE)) { 366 /* Discard the conflict matrix. */ 367 __env_alloc_free(reginfo, R_ADDR(reginfo, lr->conf_off)); 368 369 /* Discard the object hash table. */ 370 __env_alloc_free(reginfo, R_ADDR(reginfo, lr->obj_off)); 371 372 /* Discard the locker hash table. */ 373 __env_alloc_free(reginfo, R_ADDR(reginfo, lr->locker_off)); 374 375 /* Discard the object hash stat table. */ 376 __env_alloc_free(reginfo, R_ADDR(reginfo, lr->stat_off)); 377 378 for (j = 0; j < lr->part_t_size; j++) { 379 /* Discard locks. */ 380 while ((lp = SH_TAILQ_FIRST( 381 &FREE_LOCKS(lt, j), __db_lock)) != NULL) { 382 SH_TAILQ_REMOVE(&FREE_LOCKS(lt, j), 383 lp, links, __db_lock); 384 __env_alloc_free(reginfo, lp); 385 } 386 387 /* Discard objects. */ 388 while ((lockobj = SH_TAILQ_FIRST( 389 &FREE_OBJS(lt, j), __db_lockobj)) != NULL) { 390 SH_TAILQ_REMOVE(&FREE_OBJS(lt, j), 391 lockobj, links, __db_lockobj); 392 __env_alloc_free(reginfo, lockobj); 393 } 394 } 395 396 /* Discard the object partition array. */ 397 __env_alloc_free(reginfo, R_ADDR(reginfo, lr->part_off)); 398 399 /* Discard lockers. */ 400 while ((locker = 401 SH_TAILQ_FIRST(&lr->free_lockers, __db_locker)) != NULL) { 402 SH_TAILQ_REMOVE( 403 &lr->free_lockers, locker, links, __db_locker); 404 __env_alloc_free(reginfo, locker); 405 } 406 } 407 408 /* Detach from the region. */ 409 ret = __env_region_detach(env, reginfo, 0); 410 411 /* Discard DB_LOCKTAB. */ 412 __os_free(env, lt); 413 env->lk_handle = NULL; 414 415 return (ret); 416} 417 418/* 419 * __lock_region_mutex_count -- 420 * Return the number of mutexes the lock region will need. 421 * 422 * PUBLIC: u_int32_t __lock_region_mutex_count __P((ENV *)); 423 */ 424u_int32_t 425__lock_region_mutex_count(env) 426 ENV *env; 427{ 428 DB_ENV *dbenv; 429 430 dbenv = env->dbenv; 431 432 return (dbenv->lk_max + dbenv->lk_partitions + 3); 433} 434 435/* 436 * __lock_region_size -- 437 * Return the region size. 438 */ 439static size_t 440__lock_region_size(env) 441 ENV *env; 442{ 443 DB_ENV *dbenv; 444 size_t retval; 445 446 dbenv = env->dbenv; 447 448 /* 449 * Figure out how much space we're going to need. This list should 450 * map one-to-one with the __env_alloc calls in __lock_region_init. 451 */ 452 retval = 0; 453 retval += __env_alloc_size(sizeof(DB_LOCKREGION)); 454 retval += __env_alloc_size((size_t)(dbenv->lk_modes * dbenv->lk_modes)); 455 retval += __env_alloc_size( 456 __db_tablesize(dbenv->lk_max_objects) * (sizeof(DB_HASHTAB))); 457 retval += __env_alloc_size( 458 __db_tablesize(dbenv->lk_max_lockers) * (sizeof(DB_HASHTAB))); 459 retval += __env_alloc_size( 460 __db_tablesize(dbenv->lk_max_objects) * (sizeof(DB_LOCK_HSTAT))); 461 retval += 462 __env_alloc_size(dbenv->lk_partitions * (sizeof(DB_LOCKPART))); 463 retval += __env_alloc_size(sizeof(struct __db_lock)) * dbenv->lk_max; 464 retval += __env_alloc_size(sizeof(DB_LOCKOBJ)) * dbenv->lk_max_objects; 465 retval += __env_alloc_size(sizeof(DB_LOCKER)) * dbenv->lk_max_lockers; 466 467 /* 468 * Include 16 bytes of string space per lock. DB doesn't use it 469 * because we pre-allocate lock space for DBTs in the structure. 470 */ 471 retval += __env_alloc_size(dbenv->lk_max * 16); 472 473 /* And we keep getting this wrong, let's be generous. */ 474 retval += retval / 4; 475 476 return (retval); 477} 478