1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996-2009 Oracle.  All rights reserved.
5 *
6 * $Id$
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12
13static inline int __db_tas_mutex_lock_int __P((ENV *, db_mutex_t, int));
14static inline int __db_tas_mutex_readlock_int __P((ENV *, db_mutex_t, int));
15
16/*
17 * __db_tas_mutex_init --
18 *	Initialize a test-and-set mutex.
19 *
20 * PUBLIC: int __db_tas_mutex_init __P((ENV *, db_mutex_t, u_int32_t));
21 */
22int
23__db_tas_mutex_init(env, mutex, flags)
24	ENV *env;
25	db_mutex_t mutex;
26	u_int32_t flags;
27{
28	DB_ENV *dbenv;
29	DB_MUTEX *mutexp;
30	DB_MUTEXMGR *mtxmgr;
31	int ret;
32
33#ifndef HAVE_MUTEX_HYBRID
34	COMPQUIET(flags, 0);
35#endif
36
37	dbenv = env->dbenv;
38	mtxmgr = env->mutex_handle;
39	mutexp = MUTEXP_SET(mtxmgr, mutex);
40
41	/* Check alignment. */
42	if (((uintptr_t)mutexp & (dbenv->mutex_align - 1)) != 0) {
43		__db_errx(env, "TAS: mutex not appropriately aligned");
44		return (EINVAL);
45	}
46
47#ifdef HAVE_SHARED_LATCHES
48	if (F_ISSET(mutexp, DB_MUTEX_SHARED))
49		atomic_init(&mutexp->sharecount, 0);
50	else
51#endif
52	if (MUTEX_INIT(&mutexp->tas)) {
53		ret = __os_get_syserr();
54		__db_syserr(env, ret, "TAS: mutex initialize");
55		return (__os_posix_err(ret));
56	}
57#ifdef HAVE_MUTEX_HYBRID
58	if ((ret = __db_pthread_mutex_init(env,
59	     mutex, flags | DB_MUTEX_SELF_BLOCK)) != 0)
60		return (ret);
61#endif
62	return (0);
63}
64
65/*
66 * __db_tas_mutex_lock_int
67 *     Internal function to lock a mutex, or just try to lock it without waiting
68 */
69static inline int
70__db_tas_mutex_lock_int(env, mutex, nowait)
71	ENV *env;
72	db_mutex_t mutex;
73	int nowait;
74{
75	DB_ENV *dbenv;
76	DB_MUTEX *mutexp;
77	DB_MUTEXMGR *mtxmgr;
78	DB_MUTEXREGION *mtxregion;
79	DB_THREAD_INFO *ip;
80	u_int32_t nspins;
81	int ret;
82#ifndef HAVE_MUTEX_HYBRID
83	u_long ms, max_ms;
84#endif
85
86	dbenv = env->dbenv;
87
88	if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING))
89		return (0);
90
91	mtxmgr = env->mutex_handle;
92	mtxregion = mtxmgr->reginfo.primary;
93	mutexp = MUTEXP_SET(mtxmgr, mutex);
94
95	CHECK_MTX_THREAD(env, mutexp);
96
97#ifdef HAVE_STATISTICS
98	if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
99		++mutexp->mutex_set_wait;
100	else
101		++mutexp->mutex_set_nowait;
102#endif
103
104#ifndef HAVE_MUTEX_HYBRID
105	/*
106	 * Wait 1ms initially, up to 10ms for mutexes backing logical database
107	 * locks, and up to 25 ms for mutual exclusion data structure mutexes.
108	 * SR: #7675
109	 */
110	ms = 1;
111	max_ms = F_ISSET(mutexp, DB_MUTEX_LOGICAL_LOCK) ? 10 : 25;
112#endif
113
114	 /*
115	 * Only check the thread state once, by initializing the thread
116	 * control block pointer to null.  If it is not the failchk
117	 * thread, then ip will have a valid value subsequent times
118	 * in the loop.
119	 */
120	ip = NULL;
121
122loop:	/* Attempt to acquire the resource for N spins. */
123	for (nspins =
124	    mtxregion->stat.st_mutex_tas_spins; nspins > 0; --nspins) {
125#ifdef HAVE_MUTEX_S390_CC_ASSEMBLY
126		tsl_t zero;
127
128		zero = 0;
129#endif
130
131		dbenv = env->dbenv;
132
133#ifdef HAVE_MUTEX_HPPA_MSEM_INIT
134	relock:
135#endif
136		/*
137		 * Avoid interlocked instructions until they're likely to
138		 * succeed by first checking whether it is held
139		 */
140		if (MUTEXP_IS_BUSY(mutexp) || !MUTEXP_ACQUIRE(mutexp)) {
141			if (F_ISSET(dbenv, DB_ENV_FAILCHK) &&
142			    ip == NULL && dbenv->is_alive(dbenv,
143			    mutexp->pid, mutexp->tid, 0) == 0) {
144				ret = __env_set_state(env, &ip, THREAD_VERIFY);
145				if (ret != 0 ||
146				    ip->dbth_state == THREAD_FAILCHK)
147					return (DB_RUNRECOVERY);
148			}
149			if (nowait)
150				return (DB_LOCK_NOTGRANTED);
151			/*
152			 * Some systems (notably those with newer Intel CPUs)
153			 * need a small pause here. [#6975]
154			 */
155			MUTEX_PAUSE
156			continue;
157		}
158
159		MEMBAR_ENTER();
160
161#ifdef HAVE_MUTEX_HPPA_MSEM_INIT
162		/*
163		 * HP semaphores are unlocked automatically when a holding
164		 * process exits.  If the mutex appears to be locked
165		 * (F_ISSET(DB_MUTEX_LOCKED)) but we got here, assume this
166		 * has happened.  Set the pid and tid into the mutex and
167		 * lock again.  (The default state of the mutexes used to
168		 * block in __lock_get_internal is locked, so exiting with
169		 * a locked mutex is reasonable behavior for a process that
170		 * happened to initialize or use one of them.)
171		 */
172		if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
173			dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid);
174			goto relock;
175		}
176		/*
177		 * If we make it here, the mutex isn't locked, the diagnostic
178		 * won't fire, and we were really unlocked by someone calling
179		 * the DB mutex unlock function.
180		 */
181#endif
182#ifdef DIAGNOSTIC
183		if (F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
184			char buf[DB_THREADID_STRLEN];
185			__db_errx(env,
186			    "TAS lock failed: lock %d currently in use: ID: %s",
187			    mutex, dbenv->thread_id_string(dbenv,
188			    mutexp->pid, mutexp->tid, buf));
189			return (__env_panic(env, EACCES));
190		}
191#endif
192		F_SET(mutexp, DB_MUTEX_LOCKED);
193		dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid);
194
195#ifdef DIAGNOSTIC
196		/*
197		 * We want to switch threads as often as possible.  Yield
198		 * every time we get a mutex to ensure contention.
199		 */
200		if (F_ISSET(dbenv, DB_ENV_YIELDCPU))
201			__os_yield(env, 0, 0);
202#endif
203		return (0);
204	}
205
206	/* Wait for the lock to become available. */
207#ifdef HAVE_MUTEX_HYBRID
208	/*
209	 * By yielding here we can get the other thread to give up the
210	 * mutex before calling the more expensive library mutex call.
211	 * Tests have shown this to be a big win when there is contention.
212	 * With shared latches check the locked bit only after checking
213	 * that no one has the latch in shared mode.
214	 */
215	__os_yield(env, 0, 0);
216	if (!MUTEXP_IS_BUSY(mutexp))
217		goto loop;
218	if ((ret = __db_pthread_mutex_lock(env, mutex)) != 0)
219		return (ret);
220#else
221	__os_yield(env, 0, ms * US_PER_MS);
222	if ((ms <<= 1) > max_ms)
223		ms = max_ms;
224#endif
225
226	/*
227	 * We're spinning.  The environment might be hung, and somebody else
228	 * has already recovered it.  The first thing recovery does is panic
229	 * the environment.  Check to see if we're never going to get this
230	 * mutex.
231	 */
232	PANIC_CHECK(env);
233
234	goto loop;
235}
236
237/*
238 * __db_tas_mutex_lock
239 *	Lock on a mutex, blocking if necessary.
240 *
241 * PUBLIC: int __db_tas_mutex_lock __P((ENV *, db_mutex_t));
242 */
243int
244__db_tas_mutex_lock(env, mutex)
245	ENV *env;
246	db_mutex_t mutex;
247{
248	return (__db_tas_mutex_lock_int(env, mutex, 0));
249}
250
251/*
252 * __db_tas_mutex_trylock
253 *	Try to exclusively lock a mutex without ever blocking - ever!
254 *
255 *	Returns 0 on success,
256 *		DB_LOCK_NOTGRANTED on timeout
257 *		Possibly DB_RUNRECOVERY if DB_ENV_FAILCHK or panic.
258 *
259 *	This will work for DB_MUTEX_SHARED, though it always tries
260 *	for exclusive access.
261 *
262 * PUBLIC: int __db_tas_mutex_trylock __P((ENV *, db_mutex_t));
263 */
264int
265__db_tas_mutex_trylock(env, mutex)
266	ENV *env;
267	db_mutex_t mutex;
268{
269	return (__db_tas_mutex_lock_int(env, mutex, 1));
270}
271
272#if defined(HAVE_SHARED_LATCHES)
273/*
274 * __db_tas_mutex_readlock_int
275 *    Internal function to get a shared lock on a latch, blocking if necessary.
276 *
277 */
278static inline int
279__db_tas_mutex_readlock_int(env, mutex, nowait)
280	ENV *env;
281	db_mutex_t mutex;
282	int nowait;
283{
284	DB_ENV *dbenv;
285	DB_MUTEX *mutexp;
286	DB_MUTEXMGR *mtxmgr;
287	DB_MUTEXREGION *mtxregion;
288	DB_THREAD_INFO *ip;
289	int lock;
290	u_int32_t nspins;
291	int ret;
292#ifndef HAVE_MUTEX_HYBRID
293	u_long ms, max_ms;
294#endif
295	dbenv = env->dbenv;
296
297	if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING))
298		return (0);
299
300	mtxmgr = env->mutex_handle;
301	mtxregion = mtxmgr->reginfo.primary;
302	mutexp = MUTEXP_SET(mtxmgr, mutex);
303
304	CHECK_MTX_THREAD(env, mutexp);
305
306	DB_ASSERT(env, F_ISSET(mutexp, DB_MUTEX_SHARED));
307#ifdef HAVE_STATISTICS
308	if (F_ISSET(mutexp, DB_MUTEX_LOCKED))
309		++mutexp->mutex_set_rd_wait;
310	else
311		++mutexp->mutex_set_rd_nowait;
312#endif
313
314#ifndef HAVE_MUTEX_HYBRID
315	/*
316	 * Wait 1ms initially, up to 10ms for mutexes backing logical database
317	 * locks, and up to 25 ms for mutual exclusion data structure mutexes.
318	 * SR: #7675
319	 */
320	ms = 1;
321	max_ms = F_ISSET(mutexp, DB_MUTEX_LOGICAL_LOCK) ? 10 : 25;
322#endif
323	/*
324	 * Only check the thread state once, by initializing the thread
325	 * control block pointer to null.  If it is not the failchk
326	 * thread, then ip will have a valid value subsequent times
327	 * in the loop.
328	 */
329	ip = NULL;
330
331loop:	/* Attempt to acquire the resource for N spins. */
332	for (nspins =
333	    mtxregion->stat.st_mutex_tas_spins; nspins > 0; --nspins) {
334		lock = atomic_read(&mutexp->sharecount);
335		if (lock == MUTEX_SHARE_ISEXCLUSIVE ||
336		    !atomic_compare_exchange(env,
337			&mutexp->sharecount, lock, lock + 1)) {
338			if (F_ISSET(dbenv, DB_ENV_FAILCHK) &&
339			    ip == NULL && dbenv->is_alive(dbenv,
340			    mutexp->pid, mutexp->tid, 0) == 0) {
341				ret = __env_set_state(env, &ip, THREAD_VERIFY);
342				if (ret != 0 ||
343				    ip->dbth_state == THREAD_FAILCHK)
344					return (DB_RUNRECOVERY);
345			}
346			if (nowait)
347				return (DB_LOCK_NOTGRANTED);
348			/*
349			 * Some systems (notably those with newer Intel CPUs)
350			 * need a small pause here. [#6975]
351			 */
352			MUTEX_PAUSE
353			continue;
354		}
355
356		MEMBAR_ENTER();
357		/* For shared lactches the threadid is the last requestor's id.
358		 */
359		dbenv->thread_id(dbenv, &mutexp->pid, &mutexp->tid);
360
361		return (0);
362	}
363
364	/* Wait for the lock to become available. */
365#ifdef HAVE_MUTEX_HYBRID
366	/*
367	 * By yielding here we can get the other thread to give up the
368	 * mutex before calling the more expensive library mutex call.
369	 * Tests have shown this to be a big win when there is contention.
370	 */
371	__os_yield(env, 0, 0);
372	if (atomic_read(&mutexp->sharecount) != MUTEX_SHARE_ISEXCLUSIVE)
373		goto loop;
374	if ((ret = __db_pthread_mutex_lock(env, mutex)) != 0)
375		return (ret);
376#else
377	__os_yield(env, 0, ms * US_PER_MS);
378	if ((ms <<= 1) > max_ms)
379		ms = max_ms;
380#endif
381
382	/*
383	 * We're spinning.  The environment might be hung, and somebody else
384	 * has already recovered it.  The first thing recovery does is panic
385	 * the environment.  Check to see if we're never going to get this
386	 * mutex.
387	 */
388	PANIC_CHECK(env);
389
390	goto loop;
391}
392
393/*
394 * __db_tas_mutex_readlock
395 *	Get a shared lock on a latch, waiting if necessary.
396 *
397 * PUBLIC: #if defined(HAVE_SHARED_LATCHES)
398 * PUBLIC: int __db_tas_mutex_readlock __P((ENV *, db_mutex_t));
399 * PUBLIC: #endif
400 */
401int
402__db_tas_mutex_readlock(env, mutex)
403	ENV *env;
404	db_mutex_t mutex;
405{
406	return (__db_tas_mutex_readlock_int(env, mutex, 0));
407}
408
409/*
410 * __db_tas_mutex_tryreadlock
411 *	Try to get a shared lock on a latch; don't wait when busy.
412 *
413 * PUBLIC: #if defined(HAVE_SHARED_LATCHES)
414 * PUBLIC: int __db_tas_mutex_tryreadlock __P((ENV *, db_mutex_t));
415 * PUBLIC: #endif
416 */
417int
418__db_tas_mutex_tryreadlock(env, mutex)
419	ENV *env;
420	db_mutex_t mutex;
421{
422	return (__db_tas_mutex_readlock_int(env, mutex, 1));
423}
424#endif
425
426/*
427 * __db_tas_mutex_unlock --
428 *	Release a mutex.
429 *
430 * PUBLIC: int __db_tas_mutex_unlock __P((ENV *, db_mutex_t));
431 *
432 * Hybrid shared latch wakeup
433 *	When an exclusive requester waits for the last shared holder to
434 *	release, it increments mutexp->wait and pthread_cond_wait()'s. The
435 *	last shared unlock calls __db_pthread_mutex_unlock() to wake it.
436 */
437int
438__db_tas_mutex_unlock(env, mutex)
439    ENV *env;
440	db_mutex_t mutex;
441{
442	DB_ENV *dbenv;
443	DB_MUTEX *mutexp;
444	DB_MUTEXMGR *mtxmgr;
445#ifdef HAVE_MUTEX_HYBRID
446	int ret;
447#ifdef MUTEX_DIAG
448	int waiters;
449#endif
450#endif
451#ifdef HAVE_SHARED_LATCHES
452	int sharecount;
453#endif
454	dbenv = env->dbenv;
455
456	if (!MUTEX_ON(env) || F_ISSET(dbenv, DB_ENV_NOLOCKING))
457		return (0);
458
459	mtxmgr = env->mutex_handle;
460	mutexp = MUTEXP_SET(mtxmgr, mutex);
461#if defined(HAVE_MUTEX_HYBRID) && defined(MUTEX_DIAG)
462	waiters = mutexp->wait;
463#endif
464
465#if defined(DIAGNOSTIC)
466#if defined(HAVE_SHARED_LATCHES)
467	if (F_ISSET(mutexp, DB_MUTEX_SHARED)) {
468		if (atomic_read(&mutexp->sharecount) == 0) {
469			__db_errx(env, "shared unlock %d already unlocked",
470			    mutex);
471			return (__env_panic(env, EACCES));
472		}
473	} else
474#endif
475	if (!F_ISSET(mutexp, DB_MUTEX_LOCKED)) {
476		__db_errx(env, "unlock %d already unlocked", mutex);
477		return (__env_panic(env, EACCES));
478	}
479#endif
480
481#ifdef HAVE_SHARED_LATCHES
482	if (F_ISSET(mutexp, DB_MUTEX_SHARED)) {
483		sharecount = atomic_read(&mutexp->sharecount);
484		/*MUTEX_MEMBAR(mutexp->sharecount);*/		/* XXX why? */
485		if (sharecount == MUTEX_SHARE_ISEXCLUSIVE) {
486			F_CLR(mutexp, DB_MUTEX_LOCKED);
487			/* Flush flag update before zeroing count */
488			MEMBAR_EXIT();
489			atomic_init(&mutexp->sharecount, 0);
490		} else {
491			DB_ASSERT(env, sharecount > 0);
492			MEMBAR_EXIT();
493			sharecount = atomic_dec(env, &mutexp->sharecount);
494			DB_ASSERT(env, sharecount >= 0);
495			if (sharecount > 0)
496				return (0);
497		}
498	} else
499#endif
500	{
501		F_CLR(mutexp, DB_MUTEX_LOCKED);
502		MUTEX_UNSET(&mutexp->tas);
503	}
504
505#ifdef HAVE_MUTEX_HYBRID
506#ifdef DIAGNOSTIC
507	if (F_ISSET(dbenv, DB_ENV_YIELDCPU))
508		__os_yield(env, 0, 0);
509#endif
510
511	/* Prevent the load of wait from being hoisted before MUTEX_UNSET */
512	MUTEX_MEMBAR(mutexp->flags);
513	if (mutexp->wait &&
514	    (ret = __db_pthread_mutex_unlock(env, mutex)) != 0)
515		    return (ret);
516
517#ifdef MUTEX_DIAG
518	if (mutexp->wait)
519		printf("tas_unlock %d %x waiters! busy %x waiters %d/%d\n",
520		    mutex, pthread_self(),
521		    MUTEXP_BUSY_FIELD(mutexp), waiters, mutexp->wait);
522#endif
523#endif
524
525	return (0);
526}
527
528/*
529 * __db_tas_mutex_destroy --
530 *	Destroy a mutex.
531 *
532 * PUBLIC: int __db_tas_mutex_destroy __P((ENV *, db_mutex_t));
533 */
534int
535__db_tas_mutex_destroy(env, mutex)
536	ENV *env;
537	db_mutex_t mutex;
538{
539	DB_MUTEX *mutexp;
540	DB_MUTEXMGR *mtxmgr;
541#ifdef HAVE_MUTEX_HYBRID
542	int ret;
543#endif
544
545	if (!MUTEX_ON(env))
546		return (0);
547
548	mtxmgr = env->mutex_handle;
549	mutexp = MUTEXP_SET(mtxmgr, mutex);
550
551	MUTEX_DESTROY(&mutexp->tas);
552
553#ifdef HAVE_MUTEX_HYBRID
554	if ((ret = __db_pthread_mutex_destroy(env, mutex)) != 0)
555		return (ret);
556#endif
557
558	COMPQUIET(mutexp, NULL);	/* MUTEX_DESTROY may not be defined. */
559	return (0);
560}
561