1/*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996-2009 Oracle. All rights reserved. 5 * 6 * $Id$ 7 */ 8 9#include "db_config.h" 10 11#include "db_int.h" 12#include "dbinc/lock.h" 13 14static int __lock_region_init __P((ENV *, DB_LOCKTAB *)); 15static size_t 16 __lock_region_size __P((ENV *)); 17 18/* 19 * The conflict arrays are set up such that the row is the lock you are 20 * holding and the column is the lock that is desired. 21 */ 22#define DB_LOCK_RIW_N 9 23static const u_int8_t db_riw_conflicts[] = { 24/* N R W WT IW IR RIW DR WW */ 25/* N */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 26/* R */ 0, 0, 1, 0, 1, 0, 1, 0, 1, 27/* W */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 28/* WT */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 29/* IW */ 0, 1, 1, 0, 0, 0, 0, 1, 1, 30/* IR */ 0, 0, 1, 0, 0, 0, 0, 0, 1, 31/* RIW */ 0, 1, 1, 0, 0, 0, 0, 1, 1, 32/* DR */ 0, 0, 1, 0, 1, 0, 1, 0, 0, 33/* WW */ 0, 1, 1, 0, 1, 1, 1, 0, 1 34}; 35 36/* 37 * This conflict array is used for concurrent db access (CDB). It uses 38 * the same locks as the db_riw_conflicts array, but adds an IW mode to 39 * be used for write cursors. 40 */ 41#define DB_LOCK_CDB_N 5 42static const u_int8_t db_cdb_conflicts[] = { 43 /* N R W WT IW */ 44 /* N */ 0, 0, 0, 0, 0, 45 /* R */ 0, 0, 1, 0, 0, 46 /* W */ 0, 1, 1, 1, 1, 47 /* WT */ 0, 0, 0, 0, 0, 48 /* IW */ 0, 0, 1, 0, 1 49}; 50 51/* 52 * __lock_open -- 53 * Internal version of lock_open: only called from ENV->open. 54 * 55 * PUBLIC: int __lock_open __P((ENV *, int)); 56 */ 57int 58__lock_open(env, create_ok) 59 ENV *env; 60 int create_ok; 61{ 62 DB_ENV *dbenv; 63 DB_LOCKREGION *region; 64 DB_LOCKTAB *lt; 65 size_t size; 66 int region_locked, ret; 67 68 dbenv = env->dbenv; 69 region_locked = 0; 70 71 /* Create the lock table structure. */ 72 if ((ret = __os_calloc(env, 1, sizeof(DB_LOCKTAB), <)) != 0) 73 return (ret); 74 lt->env = env; 75 76 /* Join/create the lock region. */ 77 lt->reginfo.env = env; 78 lt->reginfo.type = REGION_TYPE_LOCK; 79 lt->reginfo.id = INVALID_REGION_ID; 80 lt->reginfo.flags = REGION_JOIN_OK; 81 if (create_ok) 82 F_SET(<->reginfo, REGION_CREATE_OK); 83 84 /* Make sure there is at least one object and lock per partition. */ 85 if (dbenv->lk_max_objects < dbenv->lk_partitions) 86 dbenv->lk_max_objects = dbenv->lk_partitions; 87 if (dbenv->lk_max < dbenv->lk_partitions) 88 dbenv->lk_max = dbenv->lk_partitions; 89 size = __lock_region_size(env); 90 if ((ret = __env_region_attach(env, <->reginfo, size)) != 0) 91 goto err; 92 93 /* If we created the region, initialize it. */ 94 if (F_ISSET(<->reginfo, REGION_CREATE)) 95 if ((ret = __lock_region_init(env, lt)) != 0) 96 goto err; 97 98 /* Set the local addresses. */ 99 region = lt->reginfo.primary = 100 R_ADDR(<->reginfo, lt->reginfo.rp->primary); 101 102 /* Set remaining pointers into region. */ 103 lt->conflicts = R_ADDR(<->reginfo, region->conf_off); 104 lt->obj_tab = R_ADDR(<->reginfo, region->obj_off); 105#ifdef HAVE_STATISTICS 106 lt->obj_stat = R_ADDR(<->reginfo, region->stat_off); 107#endif 108 lt->part_array = R_ADDR(<->reginfo, region->part_off); 109 lt->locker_tab = R_ADDR(<->reginfo, region->locker_off); 110 111 env->lk_handle = lt; 112 113 LOCK_REGION_LOCK(env); 114 region_locked = 1; 115 116 if (dbenv->lk_detect != DB_LOCK_NORUN) { 117 /* 118 * Check for incompatible automatic deadlock detection requests. 119 * There are scenarios where changing the detector configuration 120 * is reasonable, but we disallow them guessing it is likely to 121 * be an application error. 122 * 123 * We allow applications to turn on the lock detector, and we 124 * ignore attempts to set it to the default or current value. 125 */ 126 if (region->detect != DB_LOCK_NORUN && 127 dbenv->lk_detect != DB_LOCK_DEFAULT && 128 region->detect != dbenv->lk_detect) { 129 __db_errx(env, 130 "lock_open: incompatible deadlock detector mode"); 131 ret = EINVAL; 132 goto err; 133 } 134 if (region->detect == DB_LOCK_NORUN) 135 region->detect = dbenv->lk_detect; 136 } 137 138 /* 139 * A process joining the region may have reset the lock and transaction 140 * timeouts. 141 */ 142 if (dbenv->lk_timeout != 0) 143 region->lk_timeout = dbenv->lk_timeout; 144 if (dbenv->tx_timeout != 0) 145 region->tx_timeout = dbenv->tx_timeout; 146 147 LOCK_REGION_UNLOCK(env); 148 region_locked = 0; 149 150 return (0); 151 152err: if (lt->reginfo.addr != NULL) { 153 if (region_locked) 154 LOCK_REGION_UNLOCK(env); 155 (void)__env_region_detach(env, <->reginfo, 0); 156 } 157 env->lk_handle = NULL; 158 159 __os_free(env, lt); 160 return (ret); 161} 162 163/* 164 * __lock_region_init -- 165 * Initialize the lock region. 166 */ 167static int 168__lock_region_init(env, lt) 169 ENV *env; 170 DB_LOCKTAB *lt; 171{ 172 const u_int8_t *lk_conflicts; 173 struct __db_lock *lp; 174 DB_ENV *dbenv; 175 DB_LOCKER *lidp; 176 DB_LOCKOBJ *op; 177 DB_LOCKREGION *region; 178 DB_LOCKPART *part; 179 u_int32_t extra_locks, extra_objects, i, j, max; 180 u_int8_t *addr; 181 int lk_modes, ret; 182 183 dbenv = env->dbenv; 184 185 if ((ret = __env_alloc(<->reginfo, 186 sizeof(DB_LOCKREGION), <->reginfo.primary)) != 0) 187 goto mem_err; 188 lt->reginfo.rp->primary = R_OFFSET(<->reginfo, lt->reginfo.primary); 189 region = lt->reginfo.primary; 190 memset(region, 0, sizeof(*region)); 191 192 if ((ret = __mutex_alloc( 193 env, MTX_LOCK_REGION, 0, ®ion->mtx_region)) != 0) 194 return (ret); 195 196 /* Select a conflict matrix if none specified. */ 197 if (dbenv->lk_modes == 0) 198 if (CDB_LOCKING(env)) { 199 lk_modes = DB_LOCK_CDB_N; 200 lk_conflicts = db_cdb_conflicts; 201 } else { 202 lk_modes = DB_LOCK_RIW_N; 203 lk_conflicts = db_riw_conflicts; 204 } 205 else { 206 lk_modes = dbenv->lk_modes; 207 lk_conflicts = dbenv->lk_conflicts; 208 } 209 210 region->need_dd = 0; 211 timespecclear(®ion->next_timeout); 212 region->detect = DB_LOCK_NORUN; 213 region->lk_timeout = dbenv->lk_timeout; 214 region->tx_timeout = dbenv->tx_timeout; 215 region->locker_t_size = __db_tablesize(dbenv->lk_max_lockers); 216 region->object_t_size = __db_tablesize(dbenv->lk_max_objects); 217 region->part_t_size = dbenv->lk_partitions; 218 region->lock_id = 0; 219 region->cur_maxid = DB_LOCK_MAXID; 220 region->nmodes = lk_modes; 221 memset(®ion->stat, 0, sizeof(region->stat)); 222 region->stat.st_maxlocks = dbenv->lk_max; 223 region->stat.st_maxlockers = dbenv->lk_max_lockers; 224 region->stat.st_maxobjects = dbenv->lk_max_objects; 225 region->stat.st_partitions = dbenv->lk_partitions; 226 227 /* Allocate room for the conflict matrix and initialize it. */ 228 if ((ret = __env_alloc( 229 <->reginfo, (size_t)(lk_modes * lk_modes), &addr)) != 0) 230 goto mem_err; 231 memcpy(addr, lk_conflicts, (size_t)(lk_modes * lk_modes)); 232 region->conf_off = R_OFFSET(<->reginfo, addr); 233 234 /* Allocate room for the object hash table and initialize it. */ 235 if ((ret = __env_alloc(<->reginfo, 236 region->object_t_size * sizeof(DB_HASHTAB), &addr)) != 0) 237 goto mem_err; 238 __db_hashinit(addr, region->object_t_size); 239 region->obj_off = R_OFFSET(<->reginfo, addr); 240 241 /* Allocate room for the object hash stats table and initialize it. */ 242 if ((ret = __env_alloc(<->reginfo, 243 region->object_t_size * sizeof(DB_LOCK_HSTAT), &addr)) != 0) 244 goto mem_err; 245 memset(addr, 0, region->object_t_size * sizeof(DB_LOCK_HSTAT)); 246 region->stat_off = R_OFFSET(<->reginfo, addr); 247 248 /* Allocate room for the partition table and initialize its mutexes. */ 249 if ((ret = __env_alloc(<->reginfo, 250 region->part_t_size * sizeof(DB_LOCKPART), &part)) != 0) 251 goto mem_err; 252 memset(part, 0, region->part_t_size * sizeof(DB_LOCKPART)); 253 region->part_off = R_OFFSET(<->reginfo, part); 254 for (i = 0; i < region->part_t_size; i++) { 255 if ((ret = __mutex_alloc( 256 env, MTX_LOCK_REGION, 0, &part[i].mtx_part)) != 0) 257 return (ret); 258 } 259 if ((ret = __mutex_alloc( 260 env, MTX_LOCK_REGION, 0, ®ion->mtx_dd)) != 0) 261 return (ret); 262 263 if ((ret = __mutex_alloc( 264 env, MTX_LOCK_REGION, 0, ®ion->mtx_lockers)) != 0) 265 return (ret); 266 267 /* Allocate room for the locker hash table and initialize it. */ 268 if ((ret = __env_alloc(<->reginfo, 269 region->locker_t_size * sizeof(DB_HASHTAB), &addr)) != 0) 270 goto mem_err; 271 __db_hashinit(addr, region->locker_t_size); 272 region->locker_off = R_OFFSET(<->reginfo, addr); 273 274 SH_TAILQ_INIT(®ion->dd_objs); 275 276 /* 277 * If the locks and objects don't divide evenly, spread them around. 278 */ 279 extra_locks = region->stat.st_maxlocks - 280 ((region->stat.st_maxlocks / region->part_t_size) * 281 region->part_t_size); 282 extra_objects = region->stat.st_maxobjects - 283 ((region->stat.st_maxobjects / region->part_t_size) * 284 region->part_t_size); 285 for (j = 0; j < region->part_t_size; j++) { 286 /* Initialize locks onto a free list. */ 287 SH_TAILQ_INIT(&part[j].free_locks); 288 max = region->stat.st_maxlocks / region->part_t_size; 289 if (extra_locks > 0) { 290 max++; 291 extra_locks--; 292 } 293 for (i = 0; i < max; ++i) { 294 if ((ret = __env_alloc(<->reginfo, 295 sizeof(struct __db_lock), &lp)) != 0) 296 goto mem_err; 297 lp->mtx_lock = MUTEX_INVALID; 298 lp->gen = 0; 299 lp->status = DB_LSTAT_FREE; 300 SH_TAILQ_INSERT_HEAD( 301 &part[j].free_locks, lp, links, __db_lock); 302 } 303 /* Initialize objects onto a free list. */ 304 max = region->stat.st_maxobjects / region->part_t_size; 305 if (extra_objects > 0) { 306 max++; 307 extra_objects--; 308 } 309 SH_TAILQ_INIT(&part[j].free_objs); 310 for (i = 0; i < max; ++i) { 311 if ((ret = __env_alloc(<->reginfo, 312 sizeof(DB_LOCKOBJ), &op)) != 0) 313 goto mem_err; 314 SH_TAILQ_INSERT_HEAD( 315 &part[j].free_objs, op, links, __db_lockobj); 316 op->generation = 0; 317 } 318 } 319 320 /* Initialize lockers onto a free list. */ 321 SH_TAILQ_INIT(®ion->lockers); 322 SH_TAILQ_INIT(®ion->free_lockers); 323 for (i = 0; i < region->stat.st_maxlockers; ++i) { 324 if ((ret = 325 __env_alloc(<->reginfo, sizeof(DB_LOCKER), &lidp)) != 0) { 326mem_err: __db_errx(env, 327 "unable to allocate memory for the lock table"); 328 return (ret); 329 } 330 SH_TAILQ_INSERT_HEAD( 331 ®ion->free_lockers, lidp, links, __db_locker); 332 } 333 334 lt->reginfo.mtx_alloc = region->mtx_region; 335 return (0); 336} 337 338/* 339 * __lock_env_refresh -- 340 * Clean up after the lock system on a close or failed open. 341 * 342 * PUBLIC: int __lock_env_refresh __P((ENV *)); 343 */ 344int 345__lock_env_refresh(env) 346 ENV *env; 347{ 348 struct __db_lock *lp; 349 DB_LOCKER *locker; 350 DB_LOCKOBJ *lockobj; 351 DB_LOCKREGION *lr; 352 DB_LOCKTAB *lt; 353 REGINFO *reginfo; 354 u_int32_t j; 355 int ret; 356 357 lt = env->lk_handle; 358 reginfo = <->reginfo; 359 lr = reginfo->primary; 360 361 /* 362 * If a private region, return the memory to the heap. Not needed for 363 * filesystem-backed or system shared memory regions, that memory isn't 364 * owned by any particular process. 365 */ 366 if (F_ISSET(env, ENV_PRIVATE)) { 367 reginfo->mtx_alloc = MUTEX_INVALID; 368 /* Discard the conflict matrix. */ 369 __env_alloc_free(reginfo, R_ADDR(reginfo, lr->conf_off)); 370 371 /* Discard the object hash table. */ 372 __env_alloc_free(reginfo, R_ADDR(reginfo, lr->obj_off)); 373 374 /* Discard the locker hash table. */ 375 __env_alloc_free(reginfo, R_ADDR(reginfo, lr->locker_off)); 376 377 /* Discard the object hash stat table. */ 378 __env_alloc_free(reginfo, R_ADDR(reginfo, lr->stat_off)); 379 380 for (j = 0; j < lr->part_t_size; j++) { 381 /* Discard locks. */ 382 while ((lp = SH_TAILQ_FIRST( 383 &FREE_LOCKS(lt, j), __db_lock)) != NULL) { 384 SH_TAILQ_REMOVE(&FREE_LOCKS(lt, j), 385 lp, links, __db_lock); 386 __env_alloc_free(reginfo, lp); 387 } 388 389 /* Discard objects. */ 390 while ((lockobj = SH_TAILQ_FIRST( 391 &FREE_OBJS(lt, j), __db_lockobj)) != NULL) { 392 SH_TAILQ_REMOVE(&FREE_OBJS(lt, j), 393 lockobj, links, __db_lockobj); 394 __env_alloc_free(reginfo, lockobj); 395 } 396 } 397 398 /* Discard the object partition array. */ 399 __env_alloc_free(reginfo, R_ADDR(reginfo, lr->part_off)); 400 401 /* Discard lockers. */ 402 while ((locker = 403 SH_TAILQ_FIRST(&lr->free_lockers, __db_locker)) != NULL) { 404 SH_TAILQ_REMOVE( 405 &lr->free_lockers, locker, links, __db_locker); 406 __env_alloc_free(reginfo, locker); 407 } 408 } 409 410 /* Detach from the region. */ 411 ret = __env_region_detach(env, reginfo, 0); 412 413 /* Discard DB_LOCKTAB. */ 414 __os_free(env, lt); 415 env->lk_handle = NULL; 416 417 return (ret); 418} 419 420/* 421 * __lock_region_mutex_count -- 422 * Return the number of mutexes the lock region will need. 423 * 424 * PUBLIC: u_int32_t __lock_region_mutex_count __P((ENV *)); 425 */ 426u_int32_t 427__lock_region_mutex_count(env) 428 ENV *env; 429{ 430 DB_ENV *dbenv; 431 432 dbenv = env->dbenv; 433 434 return (dbenv->lk_max + dbenv->lk_partitions + 3); 435} 436 437/* 438 * __lock_region_size -- 439 * Return the region size. 440 */ 441static size_t 442__lock_region_size(env) 443 ENV *env; 444{ 445 DB_ENV *dbenv; 446 size_t retval; 447 448 dbenv = env->dbenv; 449 450 /* 451 * Figure out how much space we're going to need. This list should 452 * map one-to-one with the __env_alloc calls in __lock_region_init. 453 */ 454 retval = 0; 455 retval += __env_alloc_size(sizeof(DB_LOCKREGION)); 456 retval += __env_alloc_size((size_t)(dbenv->lk_modes * dbenv->lk_modes)); 457 retval += __env_alloc_size( 458 __db_tablesize(dbenv->lk_max_objects) * (sizeof(DB_HASHTAB))); 459 retval += __env_alloc_size( 460 __db_tablesize(dbenv->lk_max_lockers) * (sizeof(DB_HASHTAB))); 461 retval += __env_alloc_size( 462 __db_tablesize(dbenv->lk_max_objects) * (sizeof(DB_LOCK_HSTAT))); 463 retval += 464 __env_alloc_size(dbenv->lk_partitions * (sizeof(DB_LOCKPART))); 465 retval += __env_alloc_size(sizeof(struct __db_lock)) * dbenv->lk_max; 466 retval += __env_alloc_size(sizeof(DB_LOCKOBJ)) * dbenv->lk_max_objects; 467 retval += __env_alloc_size(sizeof(DB_LOCKER)) * dbenv->lk_max_lockers; 468 469 /* 470 * Include 16 bytes of string space per lock. DB doesn't use it 471 * because we pre-allocate lock space for DBTs in the structure. 472 */ 473 retval += __env_alloc_size(dbenv->lk_max * 16); 474 475 /* And we keep getting this wrong, let's be generous. */ 476 retval += retval / 4; 477 478 return (retval); 479} 480