1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996,2008 Oracle.  All rights reserved.
5 *
6 * $Id: lock.h,v 12.23 2008/05/07 12:27:33 bschmeck Exp $
7 */
8
9#ifndef	_DB_LOCK_H_
10#define	_DB_LOCK_H_
11
12#if defined(__cplusplus)
13extern "C" {
14#endif
15
16#define	DB_LOCK_DEFAULT_N	1000	/* Default # of locks in region. */
17
18/*
19 * The locker id space is divided between the transaction manager and the lock
20 * manager.  Lock IDs start at 1 and go to DB_LOCK_MAXID.  Txn IDs start at
21 * DB_LOCK_MAXID + 1 and go up to TXN_MAXIMUM.
22 */
23#define	DB_LOCK_INVALIDID	0
24#define	DB_LOCK_MAXID		0x7fffffff
25
26/*
27 * Out of band value for a lock.  Locks contain an offset into a lock region,
28 * so we use an invalid region offset to indicate an invalid or unset lock.
29 */
30#define	LOCK_INVALID		INVALID_ROFF
31#define	LOCK_ISSET(lock)	((lock).off != LOCK_INVALID)
32#define	LOCK_INIT(lock)		((lock).off = LOCK_INVALID)
33
34/*
35 * Macro to identify a write lock for the purpose of counting locks
36 * for the NUMWRITES option to deadlock detection.
37 */
38#define	IS_WRITELOCK(m) \
39	((m) == DB_LOCK_WRITE || (m) == DB_LOCK_WWRITE || \
40	    (m) == DB_LOCK_IWRITE || (m) == DB_LOCK_IWR)
41
42/*
43 * Macros to lock/unlock the lock region as a whole. Mostly used for
44 * initialization.
45 */
46#define	LOCK_REGION_LOCK(env)						\
47	MUTEX_LOCK(env, ((DB_LOCKREGION *)				\
48	    (env)->lk_handle->reginfo.primary)->mtx_region)
49#define	LOCK_REGION_UNLOCK(env)						\
50	MUTEX_UNLOCK(env, ((DB_LOCKREGION *)				\
51	    (env)->lk_handle->reginfo.primary)->mtx_region)
52
53/*
54 * DB_LOCKREGION --
55 *	The lock shared region.
56 */
57
58typedef struct __db_lockregion {
59	db_mutex_t	mtx_region;	/* Region mutex. */
60
61	u_int32_t	need_dd;	/* flag for deadlock detector */
62	u_int32_t	detect;		/* run dd on every conflict */
63	db_timespec	next_timeout;	/* next time to expire a lock */
64	db_mutex_t	mtx_dd;		/* mutex for lock object dd list. */
65	db_mutex_t	mtx_lockers;	/* mutex for locker allocation. */
66	SH_TAILQ_HEAD(__dobj) dd_objs;	/* objects with waiters */
67					/* free locker header */
68	SH_TAILQ_HEAD(__flocker) free_lockers;
69	SH_TAILQ_HEAD(__lkrs) lockers;	/* list of lockers */
70
71	db_timeout_t	lk_timeout;	/* timeout for locks. */
72	db_timeout_t	tx_timeout;	/* timeout for txns. */
73
74	u_int32_t	locker_t_size;	/* size of locker hash table */
75	u_int32_t	object_t_size;	/* size of object hash table */
76	u_int32_t	part_t_size;	/* number of partitions */
77
78	roff_t		conf_off;	/* offset of conflicts array */
79	roff_t		obj_off;	/* offset of object hash table */
80	roff_t		part_off;	/* offset of partition array */
81	roff_t		stat_off;	/* offset to object hash stats */
82	roff_t		locker_off;	/* offset of locker hash table */
83
84	DB_LOCK_STAT	stat;		/* stats about locking. */
85} DB_LOCKREGION;
86
87/*
88 * Since we will store DBTs in shared memory, we need the equivalent of a
89 * DBT that will work in shared memory.
90 */
91typedef struct __sh_dbt {
92	u_int32_t size;			/* Byte length. */
93	roff_t    off;			/* Region offset. */
94} SH_DBT;
95
96#define	SH_DBT_PTR(p)	((void *)(((u_int8_t *)(p)) + (p)->off))
97
98/*
99 * Object structures;  these live in the object hash table.
100 */
101typedef struct __db_lockobj {
102	u_int32_t	indx;		/* Hash index of this object. */
103	u_int32_t	generation;	/* Generation of this object. */
104	SH_DBT	lockobj;		/* Identifies object locked. */
105	SH_TAILQ_ENTRY links;		/* Links for free list or hash list. */
106	SH_TAILQ_ENTRY dd_links;	/* Links for dd list. */
107	SH_TAILQ_HEAD(__waitl) waiters;	/* List of waiting locks. */
108	SH_TAILQ_HEAD(__holdl) holders;	/* List of held locks. */
109					/* Declare room in the object to hold
110					 * typical DB lock structures so that
111					 * we do not have to allocate them from
112					 * shalloc at run-time. */
113	u_int8_t objdata[sizeof(struct __db_ilock)];
114} DB_LOCKOBJ;
115
116/*
117 * Locker structures; these live in the locker hash table.
118 */
119struct __db_locker {
120	u_int32_t id;			/* Locker id. */
121
122	pid_t pid;			/* Process owning locker ID */
123	db_threadid_t tid;		/* Thread owning locker ID */
124
125	u_int32_t dd_id;		/* Deadlock detector id. */
126
127	u_int32_t nlocks;		/* Number of locks held. */
128	u_int32_t nwrites;		/* Number of write locks held. */
129
130	roff_t  master_locker;		/* Locker of master transaction. */
131	roff_t  parent_locker;		/* Parent of this child. */
132	SH_LIST_HEAD(_child) child_locker;	/* List of descendant txns;
133						   only used in a "master"
134						   txn. */
135	SH_LIST_ENTRY child_link;	/* Links transactions in the family;
136					   elements of the child_locker
137					   list. */
138	SH_TAILQ_ENTRY links;		/* Links for free and hash list. */
139	SH_TAILQ_ENTRY ulinks;		/* Links in-use list. */
140	SH_LIST_HEAD(_held) heldby;	/* Locks held by this locker. */
141	db_timespec	lk_expire;	/* When current lock expires. */
142	db_timespec	tx_expire;	/* When this txn expires. */
143	db_timeout_t	lk_timeout;	/* How long do we let locks live. */
144
145#define	DB_LOCKER_DIRTY		0x0001
146#define	DB_LOCKER_INABORT	0x0002
147#define	DB_LOCKER_TIMEOUT	0x0004
148	u_int32_t flags;
149};
150
151/*
152 * Map a hash index into a partition.
153 */
154#define LOCK_PART(reg, ndx)  (ndx % (reg)->part_t_size)
155
156/*
157 * Structure that contains information about a lock table partition.
158 */
159typedef struct __db_lockpart{
160	db_mutex_t	mtx_part;	/* mutex for partition*/
161					/* free lock header */
162	SH_TAILQ_HEAD(__flock) free_locks;
163					/* free obj header */
164	SH_TAILQ_HEAD(__fobj) free_objs;
165#ifdef HAVE_STATISTICS
166	DB_LOCK_PSTAT	part_stat;	/* Partition stats. */
167#endif
168} DB_LOCKPART;
169
170#define FREE_LOCKS(lt, part)	((lt)->part_array[part].free_locks)
171#define FREE_OBJS(lt, part)	((lt)->part_array[part].free_objs)
172
173/*
174 * DB_LOCKTAB --
175 *	The primary library lock data structure (i.e., the one referenced
176 * by the environment, as opposed to the internal one laid out in the region.)
177 */
178struct __db_locktab {
179	ENV		*env;		/* Environment. */
180	REGINFO		 reginfo;	/* Region information. */
181	u_int8_t	*conflicts;	/* Pointer to conflict matrix. */
182	DB_LOCKPART	*part_array;	/* Beginning of partition array. */
183#ifdef HAVE_STATISTICS
184	DB_LOCK_HSTAT	*obj_stat;	/* Object hash stats array. */
185#endif
186	DB_HASHTAB	*obj_tab;	/* Beginning of object hash table. */
187	DB_HASHTAB	*locker_tab;	/* Beginning of locker hash table. */
188};
189
190/*
191 * Test for conflicts.
192 *
193 * Cast HELD and WANTED to ints, they are usually db_lockmode_t enums.
194 */
195#define	CONFLICTS(T, R, HELD, WANTED) \
196	(T)->conflicts[((int)HELD) * (R)->stat.st_nmodes + ((int)WANTED)]
197
198#define	OBJ_LINKS_VALID(L) ((L)->links.stqe_prev != -1)
199
200struct __db_lock {
201	/*
202	 * Wait on mutex to wait on lock.  You reference your own mutex with
203	 * ID 0 and others reference your mutex with ID 1.
204	 */
205	db_mutex_t	mtx_lock;
206
207	roff_t		holder;		/* Who holds this lock. */
208	u_int32_t	gen;		/* Generation count. */
209	SH_TAILQ_ENTRY	links;		/* Free or holder/waiter list. */
210	SH_LIST_ENTRY	locker_links;	/* List of locks held by a locker. */
211	u_int32_t	refcount;	/* Reference count the lock. */
212	db_lockmode_t	mode;		/* What sort of lock. */
213	roff_t		obj;		/* Relative offset of object struct. */
214	u_int32_t	indx;		/* Hash index of this object. */
215	db_status_t	status;		/* Status of this lock. */
216};
217
218/*
219 * Flag values for __lock_put_internal:
220 * DB_LOCK_DOALL:     Unlock all references in this lock (instead of only 1).
221 * DB_LOCK_FREE:      Free the lock (used in checklocker).
222 * DB_LOCK_NOPROMOTE: Don't bother running promotion when releasing locks
223 *		      (used by __lock_put_internal).
224 * DB_LOCK_UNLINK:    Remove from the locker links (used in checklocker).
225 * Make sure that these do not conflict with the interface flags because
226 * we pass some of those around.
227 */
228#define	DB_LOCK_DOALL		0x010000
229#define	DB_LOCK_FREE		0x040000
230#define	DB_LOCK_NOPROMOTE	0x080000
231#define	DB_LOCK_UNLINK		0x100000
232#define	DB_LOCK_NOWAITERS	0x400000
233
234/*
235 * Macros to get/release different types of mutexes.
236 */
237/*
238 * Operations on lock objects must be protected by a mutex, either on their
239 * partition or on the lock region.  Lock structures associated with that
240 * object are protected as well.  Each partition has a free list of objects
241 * and lock structures protected by that mutex.  We want to avoid getting
242 * multiple mutexes, particularly in __lock_vec, when there is only a
243 * single partition.  If there is only one partition, then all the calls
244 * to LOCK_SYSTEM_LOCK(UNLOCK) actually acquire(release) a lock system
245 * wide mutex and MUTEX_LOCK(UNLOCK)_PARTITION are no-ops.  If the number
246 * of partitions is greater than one, then LOCK_SYSTEM_LOCK(UNLOCK) is a
247 * no-op, and MUTEX_LOCK(UNLOCK)_PARTITION acquire a mutex on a particular
248 * partition of the lock table.
249 */
250#define LOCK_SYSTEM_LOCK(lt, reg) do {					\
251	if ((reg)->part_t_size == 1)					\
252		MUTEX_LOCK((lt)->env, (reg)->mtx_region);		\
253} while (0)
254#define LOCK_SYSTEM_UNLOCK(lt, reg) do {				\
255	if ((reg)->part_t_size == 1)					\
256		MUTEX_UNLOCK((lt)->env, (reg)->mtx_region);		\
257} while (0)
258#define MUTEX_LOCK_PARTITION(lt, reg, p) do {				\
259	if ((reg)->part_t_size != 1)					\
260		MUTEX_LOCK((lt)->env, (lt)->part_array[p].mtx_part);	\
261} while (0)
262#define MUTEX_UNLOCK_PARTITION(lt, reg, p) do {				\
263	if ((reg)->part_t_size != 1)					\
264		MUTEX_UNLOCK((lt)->env, (lt)->part_array[p].mtx_part);	\
265} while (0)
266
267#define	OBJECT_LOCK(lt, reg, obj, ndx) do {				\
268	ndx = __lock_ohash(obj) % (reg)->object_t_size;			\
269	MUTEX_LOCK_PARTITION(lt, reg, LOCK_PART(reg, ndx));		\
270} while (0)
271
272#define	OBJECT_LOCK_NDX(lt, reg, ndx)					\
273	MUTEX_LOCK_PARTITION(lt, reg, LOCK_PART(reg, ndx));
274
275#define	OBJECT_UNLOCK(lt, reg, ndx)					\
276	MUTEX_UNLOCK_PARTITION(lt, reg, LOCK_PART(reg, ndx));
277
278/*
279 * Protect the object deadlock detector queue and the locker allocation
280 * and active queues
281 */
282#define	LOCK_DD(env, region)						\
283	MUTEX_LOCK(env, (region)->mtx_dd)
284#define	UNLOCK_DD(env, region)						\
285	MUTEX_UNLOCK(env, (region)->mtx_dd)
286#define	LOCK_LOCKERS(env, region)					\
287	MUTEX_LOCK(env, (region)->mtx_lockers)
288#define	UNLOCK_LOCKERS(env, region)					\
289	MUTEX_UNLOCK(env, (region)->mtx_lockers)
290
291/*
292 * __lock_locker_hash --
293 *	Hash function for entering lockers into the locker hash table.
294 *	Since these are simply 32-bit unsigned integers at the moment,
295 *	just return the locker value.
296 */
297#define	__lock_locker_hash(locker)	(locker)
298#define	LOCKER_HASH(lt, reg, locker, ndx)				\
299	ndx = __lock_locker_hash(locker) % (reg)->locker_t_size;
300
301#if defined(__cplusplus)
302}
303#endif
304
305#include "dbinc_auto/lock_ext.h"
306#endif /* !_DB_LOCK_H_ */
307