1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996,2008 Oracle.  All rights reserved.
5 *
6 * $Id: lock_region.c,v 12.24 2008/03/13 14:41:19 mbrey Exp $
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12#include "dbinc/lock.h"
13
14static int  __lock_region_init __P((ENV *, DB_LOCKTAB *));
15static size_t
16	    __lock_region_size __P((ENV *));
17
18/*
19 * The conflict arrays are set up such that the row is the lock you are
20 * holding and the column is the lock that is desired.
21 */
22#define	DB_LOCK_RIW_N	9
23static const u_int8_t db_riw_conflicts[] = {
24/*         N   R   W   WT  IW  IR  RIW DR  WW */
25/*   N */  0,  0,  0,  0,  0,  0,  0,  0,  0,
26/*   R */  0,  0,  1,  0,  1,  0,  1,  0,  1,
27/*   W */  0,  1,  1,  1,  1,  1,  1,  1,  1,
28/*  WT */  0,  0,  0,  0,  0,  0,  0,  0,  0,
29/*  IW */  0,  1,  1,  0,  0,  0,  0,  1,  1,
30/*  IR */  0,  0,  1,  0,  0,  0,  0,  0,  1,
31/* RIW */  0,  1,  1,  0,  0,  0,  0,  1,  1,
32/*  DR */  0,  0,  1,  0,  1,  0,  1,  0,  0,
33/*  WW */  0,  1,  1,  0,  1,  1,  1,  0,  1
34};
35
36/*
37 * This conflict array is used for concurrent db access (CDB).  It uses
38 * the same locks as the db_riw_conflicts array, but adds an IW mode to
39 * be used for write cursors.
40 */
41#define	DB_LOCK_CDB_N	5
42static const u_int8_t db_cdb_conflicts[] = {
43	/*		N	R	W	WT	IW */
44	/*   N */	0,	0,	0,	0,	0,
45	/*   R */	0,	0,	1,	0,	0,
46	/*   W */	0,	1,	1,	1,	1,
47	/*  WT */	0,	0,	0,	0,	0,
48	/*  IW */	0,	0,	1,	0,	1
49};
50
51/*
52 * __lock_open --
53 *	Internal version of lock_open: only called from ENV->open.
54 *
55 * PUBLIC: int __lock_open __P((ENV *, int));
56 */
57int
58__lock_open(env, create_ok)
59	ENV *env;
60	int create_ok;
61{
62	DB_ENV *dbenv;
63	DB_LOCKREGION *region;
64	DB_LOCKTAB *lt;
65	size_t size;
66	int region_locked, ret;
67
68	dbenv = env->dbenv;
69	region_locked = 0;
70
71	/* Create the lock table structure. */
72	if ((ret = __os_calloc(env, 1, sizeof(DB_LOCKTAB), &lt)) != 0)
73		return (ret);
74	lt->env = env;
75
76	/* Join/create the lock region. */
77	lt->reginfo.env = env;
78	lt->reginfo.type = REGION_TYPE_LOCK;
79	lt->reginfo.id = INVALID_REGION_ID;
80	lt->reginfo.flags = REGION_JOIN_OK;
81	if (create_ok)
82		F_SET(&lt->reginfo, REGION_CREATE_OK);
83
84	/* Make sure there is at least one object and lock per partition. */
85	if (dbenv->lk_max_objects < dbenv->lk_partitions)
86		dbenv->lk_max_objects = dbenv->lk_partitions;
87	if (dbenv->lk_max < dbenv->lk_partitions)
88		dbenv->lk_max = dbenv->lk_partitions;
89	size = __lock_region_size(env);
90	if ((ret = __env_region_attach(env, &lt->reginfo, size)) != 0)
91		goto err;
92
93	/* If we created the region, initialize it. */
94	if (F_ISSET(&lt->reginfo, REGION_CREATE))
95		if ((ret = __lock_region_init(env, lt)) != 0)
96			goto err;
97
98	/* Set the local addresses. */
99	region = lt->reginfo.primary =
100	    R_ADDR(&lt->reginfo, lt->reginfo.rp->primary);
101
102	/* Set remaining pointers into region. */
103	lt->conflicts = R_ADDR(&lt->reginfo, region->conf_off);
104	lt->obj_tab = R_ADDR(&lt->reginfo, region->obj_off);
105#ifdef HAVE_STATISTICS
106	lt->obj_stat = R_ADDR(&lt->reginfo, region->stat_off);
107#endif
108	lt->part_array = R_ADDR(&lt->reginfo, region->part_off);
109	lt->locker_tab = R_ADDR(&lt->reginfo, region->locker_off);
110
111	env->lk_handle = lt;
112
113	LOCK_REGION_LOCK(env);
114	region_locked = 1;
115
116	if (dbenv->lk_detect != DB_LOCK_NORUN) {
117		/*
118		 * Check for incompatible automatic deadlock detection requests.
119		 * There are scenarios where changing the detector configuration
120		 * is reasonable, but we disallow them guessing it is likely to
121		 * be an application error.
122		 *
123		 * We allow applications to turn on the lock detector, and we
124		 * ignore attempts to set it to the default or current value.
125		 */
126		if (region->detect != DB_LOCK_NORUN &&
127		    dbenv->lk_detect != DB_LOCK_DEFAULT &&
128		    region->detect != dbenv->lk_detect) {
129			__db_errx(env,
130		    "lock_open: incompatible deadlock detector mode");
131			ret = EINVAL;
132			goto err;
133		}
134		if (region->detect == DB_LOCK_NORUN)
135			region->detect = dbenv->lk_detect;
136	}
137
138	/*
139	 * A process joining the region may have reset the lock and transaction
140	 * timeouts.
141	 */
142	if (dbenv->lk_timeout != 0)
143		region->lk_timeout = dbenv->lk_timeout;
144	if (dbenv->tx_timeout != 0)
145		region->tx_timeout = dbenv->tx_timeout;
146
147	LOCK_REGION_UNLOCK(env);
148	region_locked = 0;
149
150	return (0);
151
152err:	env->lk_handle = NULL;
153	if (lt->reginfo.addr != NULL) {
154		if (region_locked)
155			LOCK_REGION_UNLOCK(env);
156		(void)__env_region_detach(env, &lt->reginfo, 0);
157	}
158
159	__os_free(env, lt);
160	return (ret);
161}
162
163/*
164 * __lock_region_init --
165 *	Initialize the lock region.
166 */
167static int
168__lock_region_init(env, lt)
169	ENV *env;
170	DB_LOCKTAB *lt;
171{
172	const u_int8_t *lk_conflicts;
173	struct __db_lock *lp;
174	DB_ENV *dbenv;
175	DB_LOCKER *lidp;
176	DB_LOCKOBJ *op;
177	DB_LOCKREGION *region;
178	DB_LOCKPART *part;
179	u_int32_t extra_locks, extra_objects, i, j, max;
180	u_int8_t *addr;
181	int lk_modes, ret;
182
183	dbenv = env->dbenv;
184
185	if ((ret = __env_alloc(&lt->reginfo,
186	    sizeof(DB_LOCKREGION), &lt->reginfo.primary)) != 0)
187		goto mem_err;
188	lt->reginfo.rp->primary = R_OFFSET(&lt->reginfo, lt->reginfo.primary);
189	region = lt->reginfo.primary;
190	memset(region, 0, sizeof(*region));
191
192	if ((ret = __mutex_alloc(
193	    env, MTX_LOCK_REGION, 0, &region->mtx_region)) != 0)
194		return (ret);
195
196	/* Select a conflict matrix if none specified. */
197	if (dbenv->lk_modes == 0)
198		if (CDB_LOCKING(env)) {
199			lk_modes = DB_LOCK_CDB_N;
200			lk_conflicts = db_cdb_conflicts;
201		} else {
202			lk_modes = DB_LOCK_RIW_N;
203			lk_conflicts = db_riw_conflicts;
204		}
205	else {
206		lk_modes = dbenv->lk_modes;
207		lk_conflicts = dbenv->lk_conflicts;
208	}
209
210	region->need_dd = 0;
211	timespecclear(&region->next_timeout);
212	region->detect = DB_LOCK_NORUN;
213	region->lk_timeout = dbenv->lk_timeout;
214	region->tx_timeout = dbenv->tx_timeout;
215	region->locker_t_size = __db_tablesize(dbenv->lk_max_lockers);
216	region->object_t_size = __db_tablesize(dbenv->lk_max_objects);
217	region->part_t_size = dbenv->lk_partitions;
218	memset(&region->stat, 0, sizeof(region->stat));
219	region->stat.st_id = 0;
220	region->stat.st_cur_maxid = DB_LOCK_MAXID;
221	region->stat.st_maxlocks = dbenv->lk_max;
222	region->stat.st_maxlockers = dbenv->lk_max_lockers;
223	region->stat.st_maxobjects = dbenv->lk_max_objects;
224	region->stat.st_partitions = dbenv->lk_partitions;
225	region->stat.st_nmodes = lk_modes;
226
227	/* Allocate room for the conflict matrix and initialize it. */
228	if ((ret = __env_alloc(
229	    &lt->reginfo, (size_t)(lk_modes * lk_modes), &addr)) != 0)
230		goto mem_err;
231	memcpy(addr, lk_conflicts, (size_t)(lk_modes * lk_modes));
232	region->conf_off = R_OFFSET(&lt->reginfo, addr);
233
234	/* Allocate room for the object hash table and initialize it. */
235	if ((ret = __env_alloc(&lt->reginfo,
236	    region->object_t_size * sizeof(DB_HASHTAB), &addr)) != 0)
237		goto mem_err;
238	__db_hashinit(addr, region->object_t_size);
239	region->obj_off = R_OFFSET(&lt->reginfo, addr);
240
241	/* Allocate room for the object hash stats table and initialize it. */
242	if ((ret = __env_alloc(&lt->reginfo,
243	    region->object_t_size * sizeof(DB_LOCK_HSTAT), &addr)) != 0)
244		goto mem_err;
245	memset(addr, 0, region->object_t_size * sizeof(DB_LOCK_HSTAT));
246	region->stat_off = R_OFFSET(&lt->reginfo, addr);
247
248	/* Allocate room for the partition table and initialize its mutexes. */
249	if ((ret = __env_alloc(&lt->reginfo,
250	    region->part_t_size * sizeof(DB_LOCKPART), &part)) != 0)
251		goto mem_err;
252	memset(part, 0, region->part_t_size * sizeof(DB_LOCKPART));
253	region->part_off = R_OFFSET(&lt->reginfo, part);
254	for (i = 0; i < region->part_t_size; i++) {
255		if ((ret = __mutex_alloc(
256		    env, MTX_LOCK_REGION, 0, &part[i].mtx_part)) != 0)
257			return (ret);
258	}
259	if ((ret = __mutex_alloc(
260	    env, MTX_LOCK_REGION, 0, &region->mtx_dd)) != 0)
261		return (ret);
262
263	if ((ret = __mutex_alloc(
264	    env, MTX_LOCK_REGION, 0, &region->mtx_lockers)) != 0)
265		return (ret);
266
267	/* Allocate room for the locker hash table and initialize it. */
268	if ((ret = __env_alloc(&lt->reginfo,
269	    region->locker_t_size * sizeof(DB_HASHTAB), &addr)) != 0)
270		goto mem_err;
271	__db_hashinit(addr, region->locker_t_size);
272	region->locker_off = R_OFFSET(&lt->reginfo, addr);
273
274	SH_TAILQ_INIT(&region->dd_objs);
275
276	/*
277	 * If the locks and objects don't divide evenly, spread them around.
278	 */
279	extra_locks = region->stat.st_maxlocks -
280	    ((region->stat.st_maxlocks / region->part_t_size) *
281	    region->part_t_size);
282	extra_objects = region->stat.st_maxobjects -
283	    ((region->stat.st_maxobjects / region->part_t_size) *
284	    region->part_t_size);
285	for (j = 0; j < region->part_t_size; j++) {
286		/* Initialize locks onto a free list. */
287		SH_TAILQ_INIT(&part[j].free_locks);
288		max = region->stat.st_maxlocks / region->part_t_size;
289		if (extra_locks > 0) {
290			max++;
291			extra_locks--;
292		}
293		for (i = 0; i < max; ++i) {
294			if ((ret = __env_alloc(&lt->reginfo,
295			    sizeof(struct __db_lock), &lp)) != 0)
296				goto mem_err;
297			lp->mtx_lock = MUTEX_INVALID;
298			lp->gen = 0;
299			lp->status = DB_LSTAT_FREE;
300			SH_TAILQ_INSERT_HEAD(
301			    &part[j].free_locks, lp, links, __db_lock);
302		}
303		/* Initialize objects onto a free list.  */
304		max = region->stat.st_maxobjects / region->part_t_size;
305		if (extra_objects > 0) {
306			max++;
307			extra_objects--;
308		}
309		SH_TAILQ_INIT(&part[j].free_objs);
310		for (i = 0; i < max; ++i) {
311			if ((ret = __env_alloc(&lt->reginfo,
312			    sizeof(DB_LOCKOBJ), &op)) != 0)
313				goto mem_err;
314			SH_TAILQ_INSERT_HEAD(
315			    &part[j].free_objs, op, links, __db_lockobj);
316			op->generation = 0;
317		}
318	}
319
320	/* Initialize lockers onto a free list.  */
321	SH_TAILQ_INIT(&region->lockers);
322	SH_TAILQ_INIT(&region->free_lockers);
323	for (i = 0; i < region->stat.st_maxlockers; ++i) {
324		if ((ret =
325		    __env_alloc(&lt->reginfo, sizeof(DB_LOCKER), &lidp)) != 0) {
326mem_err:		__db_errx(env,
327			    "unable to allocate memory for the lock table");
328			return (ret);
329		}
330		SH_TAILQ_INSERT_HEAD(
331		    &region->free_lockers, lidp, links, __db_locker);
332	}
333
334	return (0);
335}
336
337/*
338 * __lock_env_refresh --
339 *	Clean up after the lock system on a close or failed open.
340 *
341 * PUBLIC: int __lock_env_refresh __P((ENV *));
342 */
343int
344__lock_env_refresh(env)
345	ENV *env;
346{
347	struct __db_lock *lp;
348	DB_LOCKER *locker;
349	DB_LOCKOBJ *lockobj;
350	DB_LOCKREGION *lr;
351	DB_LOCKTAB *lt;
352	REGINFO *reginfo;
353	u_int32_t j;
354	int ret;
355
356	lt = env->lk_handle;
357	reginfo = &lt->reginfo;
358	lr = reginfo->primary;
359
360	/*
361	 * If a private region, return the memory to the heap.  Not needed for
362	 * filesystem-backed or system shared memory regions, that memory isn't
363	 * owned by any particular process.
364	 */
365	if (F_ISSET(env, ENV_PRIVATE)) {
366		/* Discard the conflict matrix. */
367		__env_alloc_free(reginfo, R_ADDR(reginfo, lr->conf_off));
368
369		/* Discard the object hash table. */
370		__env_alloc_free(reginfo, R_ADDR(reginfo, lr->obj_off));
371
372		/* Discard the locker hash table. */
373		__env_alloc_free(reginfo, R_ADDR(reginfo, lr->locker_off));
374
375		/* Discard the object hash stat table. */
376		__env_alloc_free(reginfo, R_ADDR(reginfo, lr->stat_off));
377
378		for (j = 0; j < lr->part_t_size; j++) {
379			/* Discard locks. */
380			while ((lp = SH_TAILQ_FIRST(
381			    &FREE_LOCKS(lt, j), __db_lock)) != NULL) {
382				SH_TAILQ_REMOVE(&FREE_LOCKS(lt, j),
383				     lp, links, __db_lock);
384				__env_alloc_free(reginfo, lp);
385			}
386
387			/* Discard objects. */
388			while ((lockobj = SH_TAILQ_FIRST(
389			    &FREE_OBJS(lt, j), __db_lockobj)) != NULL) {
390				SH_TAILQ_REMOVE(&FREE_OBJS(lt, j),
391				     lockobj, links, __db_lockobj);
392				__env_alloc_free(reginfo, lockobj);
393			}
394		}
395
396		/* Discard the object partition array. */
397		__env_alloc_free(reginfo, R_ADDR(reginfo, lr->part_off));
398
399		/* Discard lockers. */
400		while ((locker =
401		    SH_TAILQ_FIRST(&lr->free_lockers, __db_locker)) != NULL) {
402			SH_TAILQ_REMOVE(
403			    &lr->free_lockers, locker, links, __db_locker);
404			__env_alloc_free(reginfo, locker);
405		}
406	}
407
408	/* Detach from the region. */
409	ret = __env_region_detach(env, reginfo, 0);
410
411	/* Discard DB_LOCKTAB. */
412	__os_free(env, lt);
413	env->lk_handle = NULL;
414
415	return (ret);
416}
417
418/*
419 * __lock_region_mutex_count --
420 *	Return the number of mutexes the lock region will need.
421 *
422 * PUBLIC: u_int32_t __lock_region_mutex_count __P((ENV *));
423 */
424u_int32_t
425__lock_region_mutex_count(env)
426	ENV *env;
427{
428	DB_ENV *dbenv;
429
430	dbenv = env->dbenv;
431
432	return (dbenv->lk_max + dbenv->lk_partitions + 3);
433}
434
435/*
436 * __lock_region_size --
437 *	Return the region size.
438 */
439static size_t
440__lock_region_size(env)
441	ENV *env;
442{
443	DB_ENV *dbenv;
444	size_t retval;
445
446	dbenv = env->dbenv;
447
448	/*
449	 * Figure out how much space we're going to need.  This list should
450	 * map one-to-one with the __env_alloc calls in __lock_region_init.
451	 */
452	retval = 0;
453	retval += __env_alloc_size(sizeof(DB_LOCKREGION));
454	retval += __env_alloc_size((size_t)(dbenv->lk_modes * dbenv->lk_modes));
455	retval += __env_alloc_size(
456	    __db_tablesize(dbenv->lk_max_objects) * (sizeof(DB_HASHTAB)));
457	retval += __env_alloc_size(
458	    __db_tablesize(dbenv->lk_max_lockers) * (sizeof(DB_HASHTAB)));
459	retval += __env_alloc_size(
460	    __db_tablesize(dbenv->lk_max_objects) * (sizeof(DB_LOCK_HSTAT)));
461	retval +=
462	    __env_alloc_size(dbenv->lk_partitions * (sizeof(DB_LOCKPART)));
463	retval += __env_alloc_size(sizeof(struct __db_lock)) * dbenv->lk_max;
464	retval += __env_alloc_size(sizeof(DB_LOCKOBJ)) * dbenv->lk_max_objects;
465	retval += __env_alloc_size(sizeof(DB_LOCKER)) * dbenv->lk_max_lockers;
466
467	/*
468	 * Include 16 bytes of string space per lock.  DB doesn't use it
469	 * because we pre-allocate lock space for DBTs in the structure.
470	 */
471	retval += __env_alloc_size(dbenv->lk_max * 16);
472
473	/* And we keep getting this wrong, let's be generous. */
474	retval += retval / 4;
475
476	return (retval);
477}
478