1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996,2008 Oracle.  All rights reserved.
5 *
6 * $Id: mut_region.c,v 12.30 2008/01/08 20:58:43 bostic Exp $
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12#include "dbinc/log.h"
13#include "dbinc/lock.h"
14#include "dbinc/mp.h"
15#include "dbinc/txn.h"
16#include "dbinc/mutex_int.h"
17
18static size_t __mutex_align_size __P((ENV *));
19static int __mutex_region_init __P((ENV *, DB_MUTEXMGR *));
20static size_t __mutex_region_size __P((ENV *));
21
22/*
23 * __mutex_open --
24 *	Open a mutex region.
25 *
26 * PUBLIC: int __mutex_open __P((ENV *, int));
27 */
28int
29__mutex_open(env, create_ok)
30	ENV *env;
31	int create_ok;
32{
33	DB_ENV *dbenv;
34	DB_MUTEXMGR *mtxmgr;
35	DB_MUTEXREGION *mtxregion;
36	db_mutex_t mutex;
37	u_int32_t cpu_count;
38	u_int i;
39	int ret;
40
41	dbenv = env->dbenv;
42
43	/*
44	 * Initialize the ENV handle information if not already initialized.
45	 *
46	 * Align mutexes on the byte boundaries specified by the application.
47	 */
48	if (dbenv->mutex_align == 0)
49		dbenv->mutex_align = MUTEX_ALIGN;
50	if (dbenv->mutex_tas_spins == 0) {
51		cpu_count = __os_cpu_count();
52		if ((ret = __mutex_set_tas_spins(dbenv, cpu_count == 1 ?
53		    cpu_count : cpu_count * MUTEX_SPINS_PER_PROCESSOR)) != 0)
54			return (ret);
55	}
56
57	/*
58	 * If the user didn't set an absolute value on the number of mutexes
59	 * we'll need, figure it out.  We're conservative in our allocation,
60	 * we need mutexes for DB handles, group-commit queues and other things
61	 * applications allocate at run-time.  The application may have kicked
62	 * up our count to allocate its own mutexes, add that in.
63	 */
64	if (dbenv->mutex_cnt == 0)
65		dbenv->mutex_cnt =
66		    __lock_region_mutex_count(env) +
67		    __log_region_mutex_count(env) +
68		    __memp_region_mutex_count(env) +
69		    __txn_region_mutex_count(env) +
70		    dbenv->mutex_inc + 100;
71
72	/* Create/initialize the mutex manager structure. */
73	if ((ret = __os_calloc(env, 1, sizeof(DB_MUTEXMGR), &mtxmgr)) != 0)
74		return (ret);
75
76	/* Join/create the mutex region. */
77	mtxmgr->reginfo.env = env;
78	mtxmgr->reginfo.type = REGION_TYPE_MUTEX;
79	mtxmgr->reginfo.id = INVALID_REGION_ID;
80	mtxmgr->reginfo.flags = REGION_JOIN_OK;
81	if (create_ok)
82		F_SET(&mtxmgr->reginfo, REGION_CREATE_OK);
83	if ((ret = __env_region_attach(env,
84	    &mtxmgr->reginfo, __mutex_region_size(env))) != 0)
85		goto err;
86
87	/* If we created the region, initialize it. */
88	if (F_ISSET(&mtxmgr->reginfo, REGION_CREATE))
89		if ((ret = __mutex_region_init(env, mtxmgr)) != 0)
90			goto err;
91
92	/* Set the local addresses. */
93	mtxregion = mtxmgr->reginfo.primary =
94	    R_ADDR(&mtxmgr->reginfo, mtxmgr->reginfo.rp->primary);
95	mtxmgr->mutex_array = R_ADDR(&mtxmgr->reginfo, mtxregion->mutex_off);
96
97	env->mutex_handle = mtxmgr;
98
99	/* Allocate initial queue of mutexes. */
100	if (env->mutex_iq != NULL) {
101		DB_ASSERT(env, F_ISSET(&mtxmgr->reginfo, REGION_CREATE));
102		for (i = 0; i < env->mutex_iq_next; ++i) {
103			if ((ret = __mutex_alloc_int(
104			    env, 0, env->mutex_iq[i].alloc_id,
105			    env->mutex_iq[i].flags, &mutex)) != 0)
106				goto err;
107			/*
108			 * Confirm we allocated the right index, correcting
109			 * for avoiding slot 0 (MUTEX_INVALID).
110			 */
111			DB_ASSERT(env, mutex == i + 1);
112		}
113		__os_free(env, env->mutex_iq);
114		env->mutex_iq = NULL;
115
116		/*
117		 * This is the first place we can test mutexes and we need to
118		 * know if they're working.  (They CAN fail, for example on
119		 * SunOS, when using fcntl(2) for locking and using an
120		 * in-memory filesystem as the database environment directory.
121		 * But you knew that, I'm sure -- it probably wasn't worth
122		 * mentioning.)
123		 */
124		mutex = MUTEX_INVALID;
125		if ((ret =
126		    __mutex_alloc(env, MTX_MUTEX_TEST, 0, &mutex) != 0) ||
127		    (ret = __mutex_lock(env, mutex)) != 0 ||
128		    (ret = __mutex_unlock(env, mutex)) != 0 ||
129		    (ret = __mutex_free(env, &mutex)) != 0) {
130			__db_errx(env,
131		    "Unable to acquire/release a mutex; check configuration");
132			goto err;
133		}
134	}
135
136	return (0);
137
138err:	env->mutex_handle = NULL;
139	if (mtxmgr->reginfo.addr != NULL)
140		(void)__env_region_detach(env, &mtxmgr->reginfo, 0);
141
142	__os_free(env, mtxmgr);
143	return (ret);
144}
145
146/*
147 * __mutex_region_init --
148 *	Initialize a mutex region in shared memory.
149 */
150static int
151__mutex_region_init(env, mtxmgr)
152	ENV *env;
153	DB_MUTEXMGR *mtxmgr;
154{
155	DB_ENV *dbenv;
156	DB_MUTEX *mutexp;
157	DB_MUTEXREGION *mtxregion;
158	db_mutex_t i;
159	int ret;
160	void *mutex_array;
161
162	dbenv = env->dbenv;
163
164	COMPQUIET(mutexp, NULL);
165
166	if ((ret = __env_alloc(&mtxmgr->reginfo,
167	    sizeof(DB_MUTEXREGION), &mtxmgr->reginfo.primary)) != 0) {
168		__db_errx(env,
169		    "Unable to allocate memory for the mutex region");
170		return (ret);
171	}
172	mtxmgr->reginfo.rp->primary =
173	    R_OFFSET(&mtxmgr->reginfo, mtxmgr->reginfo.primary);
174	mtxregion = mtxmgr->reginfo.primary;
175	memset(mtxregion, 0, sizeof(*mtxregion));
176
177	if ((ret = __mutex_alloc(
178	    env, MTX_MUTEX_REGION, 0, &mtxregion->mtx_region)) != 0)
179		return (ret);
180
181	mtxregion->mutex_size = __mutex_align_size(env);
182
183	mtxregion->stat.st_mutex_align = dbenv->mutex_align;
184	mtxregion->stat.st_mutex_cnt = dbenv->mutex_cnt;
185	mtxregion->stat.st_mutex_tas_spins = dbenv->mutex_tas_spins;
186
187	/*
188	 * Get a chunk of memory to be used for the mutexes themselves.  Each
189	 * piece of the memory must be properly aligned, and that alignment
190	 * may be more restrictive than the memory alignment returned by the
191	 * underlying allocation code.  We already know how much memory each
192	 * mutex in the array will take up, but we need to offset the first
193	 * mutex in the array so the array begins properly aligned.
194	 *
195	 * The OOB mutex (MUTEX_INVALID) is 0.  To make this work, we ignore
196	 * the first allocated slot when we build the free list.  We have to
197	 * correct the count by 1 here, though, otherwise our counter will be
198	 * off by 1.
199	 */
200	if ((ret = __env_alloc(&mtxmgr->reginfo,
201	    mtxregion->stat.st_mutex_align +
202	    (mtxregion->stat.st_mutex_cnt + 1) * mtxregion->mutex_size,
203	    &mutex_array)) != 0) {
204		__db_errx(env,
205		    "Unable to allocate memory for mutexes from the region");
206		return (ret);
207	}
208
209	mtxregion->mutex_off_alloc = R_OFFSET(&mtxmgr->reginfo, mutex_array);
210	mutex_array = ALIGNP_INC(mutex_array, mtxregion->stat.st_mutex_align);
211	mtxregion->mutex_off = R_OFFSET(&mtxmgr->reginfo, mutex_array);
212	mtxmgr->mutex_array = mutex_array;
213
214	/*
215	 * Put the mutexes on a free list and clear the allocated flag.
216	 *
217	 * The OOB mutex (MUTEX_INVALID) is 0, skip it.
218	 *
219	 * The comparison is <, not <=, because we're looking ahead one
220	 * in each link.
221	 */
222	for (i = 1; i < mtxregion->stat.st_mutex_cnt; ++i) {
223		mutexp = MUTEXP_SET(i);
224		mutexp->flags = 0;
225		mutexp->mutex_next_link = i + 1;
226	}
227	mutexp = MUTEXP_SET(i);
228	mutexp->flags = 0;
229	mutexp->mutex_next_link = MUTEX_INVALID;
230	mtxregion->mutex_next = 1;
231	mtxregion->stat.st_mutex_free = mtxregion->stat.st_mutex_cnt;
232	mtxregion->stat.st_mutex_inuse = mtxregion->stat.st_mutex_inuse_max = 0;
233
234	return (0);
235}
236
237/*
238 * __mutex_env_refresh --
239 *	Clean up after the mutex region on a close or failed open.
240 *
241 * PUBLIC: int __mutex_env_refresh __P((ENV *));
242 */
243int
244__mutex_env_refresh(env)
245	ENV *env;
246{
247	DB_MUTEXMGR *mtxmgr;
248	DB_MUTEXREGION *mtxregion;
249	REGINFO *reginfo;
250	int ret;
251
252	mtxmgr = env->mutex_handle;
253	reginfo = &mtxmgr->reginfo;
254	mtxregion = mtxmgr->reginfo.primary;
255
256	/*
257	 * If a private region, return the memory to the heap.  Not needed for
258	 * filesystem-backed or system shared memory regions, that memory isn't
259	 * owned by any particular process.
260	 */
261	if (F_ISSET(env, ENV_PRIVATE)) {
262#ifdef HAVE_MUTEX_SYSTEM_RESOURCES
263		/*
264		 * If destroying the mutex region, return any system resources
265		 * to the system.
266		 */
267		__mutex_resource_return(env, reginfo);
268#endif
269		/* Discard the mutex array. */
270		__env_alloc_free(
271		    reginfo, R_ADDR(reginfo, mtxregion->mutex_off_alloc));
272	}
273
274	/* Detach from the region. */
275	ret = __env_region_detach(env, reginfo, 0);
276
277	__os_free(env, mtxmgr);
278
279	env->mutex_handle = NULL;
280
281	return (ret);
282}
283
284/*
285 * __mutex_align_size --
286 *	Return how much memory each mutex will take up if an array of them
287 *	are to be properly aligned, individually, within the array.
288 */
289static size_t
290__mutex_align_size(env)
291	ENV *env;
292{
293	DB_ENV *dbenv;
294
295	dbenv = env->dbenv;
296
297	return ((size_t)DB_ALIGN(sizeof(DB_MUTEX), dbenv->mutex_align));
298}
299
300/*
301 * __mutex_region_size --
302 *	 Return the amount of space needed for the mutex region.
303 */
304static size_t
305__mutex_region_size(env)
306	ENV *env;
307{
308	DB_ENV *dbenv;
309	size_t s;
310
311	dbenv = env->dbenv;
312
313	s = sizeof(DB_MUTEXMGR) + 1024;
314
315	/* We discard one mutex for the OOB slot. */
316	s += __env_alloc_size(
317	    (dbenv->mutex_cnt + 1) *__mutex_align_size(env));
318
319	return (s);
320}
321
322#ifdef	HAVE_MUTEX_SYSTEM_RESOURCES
323/*
324 * __mutex_resource_return
325 *	Return any system-allocated mutex resources to the system.
326 *
327 * PUBLIC: void __mutex_resource_return __P((ENV *, REGINFO *));
328 */
329void
330__mutex_resource_return(env, infop)
331	ENV *env;
332	REGINFO *infop;
333{
334	DB_MUTEX *mutexp;
335	DB_MUTEXMGR *mtxmgr, mtxmgr_st;
336	DB_MUTEXREGION *mtxregion;
337	db_mutex_t i;
338	void *orig_handle;
339
340	/*
341	 * This routine is called in two cases: when discarding the regions
342	 * from a previous Berkeley DB run, during recovery, and two, when
343	 * discarding regions as we shut down the database environment.
344	 *
345	 * Walk the list of mutexes and destroy any live ones.
346	 *
347	 * This is just like joining a region -- the REGINFO we're handed is
348	 * the same as the one returned by __env_region_attach(), all we have
349	 * to do is fill in the links.
350	 *
351	 * !!!
352	 * The region may be corrupted, of course.  We're safe because the
353	 * only things we look at are things that are initialized when the
354	 * region is created, and never modified after that.
355	 */
356	memset(&mtxmgr_st, 0, sizeof(mtxmgr_st));
357	mtxmgr = &mtxmgr_st;
358	mtxmgr->reginfo = *infop;
359	mtxregion = mtxmgr->reginfo.primary =
360	    R_ADDR(&mtxmgr->reginfo, mtxmgr->reginfo.rp->primary);
361	mtxmgr->mutex_array = R_ADDR(&mtxmgr->reginfo, mtxregion->mutex_off);
362
363	/*
364	 * This is a little strange, but the mutex_handle is what all of the
365	 * underlying mutex routines will use to determine if they should do
366	 * any work and to find their information.  Save/restore the handle
367	 * around the work loop.
368	 *
369	 * The OOB mutex (MUTEX_INVALID) is 0, skip it.
370	 */
371	orig_handle = env->mutex_handle;
372	env->mutex_handle = mtxmgr;
373	for (i = 1; i <= mtxregion->stat.st_mutex_cnt; ++i, ++mutexp) {
374		mutexp = MUTEXP_SET(i);
375		if (F_ISSET(mutexp, DB_MUTEX_ALLOCATED))
376			(void)__mutex_destroy(env, i);
377	}
378	env->mutex_handle = orig_handle;
379}
380#endif
381