1/*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996,2008 Oracle.  All rights reserved.
5 *
6 * $Id: lock_list.c,v 12.19 2008/01/30 04:30:42 mjc Exp $
7 */
8
9#include "db_config.h"
10
11#include "db_int.h"
12#include "dbinc/lock.h"
13#include "dbinc/log.h"
14
15static int __lock_sort_cmp __P((const void *, const void *));
16
17/*
18 * Lock list routines.
19 *	The list is composed of a 32-bit count of locks followed by
20 * each lock.  A lock is represented by a 16-bit page-count, a lock
21 * object and a page list.  A lock object consists of a 16-bit size
22 * and the object itself.   In a pseudo BNF notation, you get:
23 *
24 * LIST = COUNT32 LOCK*
25 * LOCK = COUNT16 LOCKOBJ PAGELIST
26 * LOCKOBJ = COUNT16 OBJ
27 * PAGELIST = COUNT32*
28 *
29 * (Recall that X* means "0 or more X's")
30 *
31 * In most cases, the OBJ is a struct __db_ilock and the page list is
32 * a series of (32-bit) page numbers that should get written into the
33 * pgno field of the __db_ilock.  So, the actual number of pages locked
34 * is the number of items in the PAGELIST plus 1. If this is an application-
35 * specific lock, then we cannot interpret obj and the pagelist must
36 * be empty.
37 *
38 * Consider a lock list for: File A, pages 1&2, File B pages 3-5, Applock
39 * This would be represented as:
40 *	5 1 [fid=A;page=1] 2 2 [fid=B;page=3] 4 5 0 APPLOCK
41 *        ------------------ -------------------- ---------
42 *         LOCK for file A    LOCK for file B     application-specific lock
43 */
44
45#define	MAX_PGNOS	0xffff
46
47/*
48 * These macros are bigger than one might expect because some compilers say a
49 * cast does not return an lvalue, so constructs like *(u_int32_t*)dp = count;
50 * generate warnings.
51 */
52#define	RET_SIZE(size, count)  ((size) +				\
53     sizeof(u_int32_t) + (count) * 2 * sizeof(u_int16_t))
54
55#define	PUT_COUNT(dp, count)	do {	u_int32_t __c = (count);	\
56					LOGCOPY_32(env, dp, &__c);	\
57					dp = (u_int8_t *)dp +		\
58					     sizeof(u_int32_t);		\
59				} while (0)
60#define	PUT_PCOUNT(dp, count)	do {	u_int16_t __c = (count);	\
61					LOGCOPY_16(env, dp, &__c);	\
62					dp = (u_int8_t *)dp +		\
63					    sizeof(u_int16_t);		\
64				} while (0)
65#define	PUT_SIZE(dp, size)	do {	u_int16_t __s = (size);		\
66					LOGCOPY_16(env, dp, &__s);	\
67					dp = (u_int8_t *)dp +		\
68					    sizeof(u_int16_t);		\
69				} while (0)
70#define	PUT_PGNO(dp, pgno)	do {	db_pgno_t __pg = (pgno);	\
71					LOGCOPY_32(env, dp, &__pg);	\
72					dp = (u_int8_t *)dp +		\
73					    sizeof(db_pgno_t);		\
74				} while (0)
75#define	COPY_OBJ(dp, obj)	do {					\
76					memcpy(dp,			\
77					    (obj)->data, (obj)->size);  \
78					dp = (u_int8_t *)dp +		\
79					     DB_ALIGN((obj)->size,	\
80					     sizeof(u_int32_t));	\
81				} while (0)
82#define	GET_COUNT(dp, count)	do {	LOGCOPY_32(env, &count, dp);	\
83					dp = (u_int8_t *)dp +		\
84					     sizeof(u_int32_t);	\
85				} while (0)
86#define	GET_PCOUNT(dp, count)	do {	LOGCOPY_16(env, &count, dp);	\
87					dp = (u_int8_t *)dp +		\
88					     sizeof(u_int16_t);	\
89				} while (0)
90#define	GET_SIZE(dp, size)	do {	LOGCOPY_16(env, &size, dp);	\
91					dp = (u_int8_t *)dp +		\
92					     sizeof(u_int16_t);	\
93				} while (0)
94#define	GET_PGNO(dp, pgno)	do {	LOGCOPY_32(env, &pgno, dp);	\
95					dp = (u_int8_t *)dp +		\
96					     sizeof(db_pgno_t);	\
97				} while (0)
98
99/*
100 * __lock_fix_list --
101 *
102 * PUBLIC: int __lock_fix_list __P((ENV *, DBT *, u_int32_t));
103 */
104int
105__lock_fix_list(env, list_dbt, nlocks)
106	ENV *env;
107	DBT *list_dbt;
108	u_int32_t nlocks;
109{
110	DBT *obj;
111	DB_LOCK_ILOCK *lock, *plock;
112	u_int32_t i, j, nfid, npgno, size;
113	u_int8_t *data, *dp;
114	int ret;
115
116	if ((size = list_dbt->size) == 0)
117		return (0);
118
119	obj = (DBT *)list_dbt->data;
120
121	/*
122	 * If necessary sort the list of locks so that locks on the same fileid
123	 * are together.  We do not sort 1 or 2 locks because by definition if
124	 * there are locks on the same fileid they will be together.  The sort
125	 * will also move any locks that do not look like page locks to the end
126	 * of the list so we can stop looking for locks we can combine when we
127	 * hit one.
128	 */
129	switch (nlocks) {
130	case 1:
131		size = RET_SIZE(obj->size, 1);
132		if ((ret = __os_malloc(env, size, &data)) != 0)
133			return (ret);
134
135		dp = data;
136		PUT_COUNT(dp, 1);
137		PUT_PCOUNT(dp, 0);
138		PUT_SIZE(dp, obj->size);
139		COPY_OBJ(dp, obj);
140		break;
141	default:
142		/* Sort so that all locks with same fileid are together. */
143		qsort(list_dbt->data, nlocks, sizeof(DBT), __lock_sort_cmp);
144		/* FALLTHROUGH */
145	case 2:
146		nfid = npgno = 0;
147		i = 0;
148		if (obj->size != sizeof(DB_LOCK_ILOCK))
149			goto not_ilock;
150
151		nfid = 1;
152		plock = (DB_LOCK_ILOCK *)obj->data;
153
154		/* We use ulen to keep track of the number of pages. */
155		j = 0;
156		obj[0].ulen = 0;
157		for (i = 1; i < nlocks; i++) {
158			if (obj[i].size != sizeof(DB_LOCK_ILOCK))
159				break;
160			lock = (DB_LOCK_ILOCK *)obj[i].data;
161			if (obj[j].ulen < MAX_PGNOS &&
162			     lock->type == plock->type &&
163			     memcmp(lock->fileid,
164			     plock->fileid, DB_FILE_ID_LEN) == 0) {
165				obj[j].ulen++;
166				npgno++;
167			} else {
168				nfid++;
169				plock = lock;
170				j = i;
171				obj[j].ulen = 0;
172			}
173		}
174
175not_ilock:	size = nfid * sizeof(DB_LOCK_ILOCK);
176		size += npgno * sizeof(db_pgno_t);
177		/* Add the number of nonstandard locks and get their size. */
178		nfid += nlocks - i;
179		for (; i < nlocks; i++) {
180			size += obj[i].size;
181			obj[i].ulen = 0;
182		}
183
184		size = RET_SIZE(size, nfid);
185		if ((ret = __os_malloc(env, size, &data)) != 0)
186			return (ret);
187
188		dp = data;
189		PUT_COUNT(dp, nfid);
190
191		for (i = 0; i < nlocks; i = j) {
192			PUT_PCOUNT(dp, obj[i].ulen);
193			PUT_SIZE(dp, obj[i].size);
194			COPY_OBJ(dp, &obj[i]);
195			lock = (DB_LOCK_ILOCK *)obj[i].data;
196			for (j = i + 1; j <= i + obj[i].ulen; j++) {
197				lock = (DB_LOCK_ILOCK *)obj[j].data;
198				PUT_PGNO(dp, lock->pgno);
199			}
200		}
201	}
202
203	__os_free(env, list_dbt->data);
204
205	list_dbt->data = data;
206	list_dbt->size = size;
207
208	return (0);
209}
210
211/*
212 * PUBLIC: int __lock_get_list __P((ENV *, DB_LOCKER *, u_int32_t,
213 * PUBLIC:	      db_lockmode_t, DBT *));
214 */
215int
216__lock_get_list(env, locker, flags, lock_mode, list)
217	ENV *env;
218	DB_LOCKER *locker;
219	u_int32_t flags;
220	db_lockmode_t lock_mode;
221	DBT *list;
222{
223	DBT obj_dbt;
224	DB_LOCK ret_lock;
225	DB_LOCKREGION *region;
226	DB_LOCKTAB *lt;
227	DB_LOCK_ILOCK *lock;
228	db_pgno_t save_pgno;
229	u_int16_t npgno, size;
230	u_int32_t i, nlocks;
231	int ret;
232	void *data, *dp;
233
234	if (list->size == 0)
235		return (0);
236	ret = 0;
237	data = NULL;
238
239	lt = env->lk_handle;
240	dp = list->data;
241
242	/*
243	 * There is no assurance log records will be aligned.  If not, then
244	 * copy the data to an aligned region so the rest of the code does
245	 * not have to worry about it.
246	 */
247	if ((uintptr_t)dp != DB_ALIGN((uintptr_t)dp, sizeof(u_int32_t))) {
248		if ((ret = __os_malloc(env, list->size, &data)) != 0)
249			return (ret);
250		memcpy(data, list->data, list->size);
251		dp = data;
252	}
253
254	region = lt->reginfo.primary;
255	LOCK_SYSTEM_LOCK(lt, region);
256	GET_COUNT(dp, nlocks);
257
258	for (i = 0; i < nlocks; i++) {
259		GET_PCOUNT(dp, npgno);
260		GET_SIZE(dp, size);
261		lock = (DB_LOCK_ILOCK *) dp;
262		save_pgno = lock->pgno;
263		obj_dbt.data = dp;
264		obj_dbt.size = size;
265		dp = ((u_int8_t *)dp) + DB_ALIGN(size, sizeof(u_int32_t));
266		do {
267			if ((ret = __lock_get_internal(lt, locker,
268			     flags, &obj_dbt, lock_mode, 0, &ret_lock)) != 0) {
269			     lock->pgno = save_pgno;
270			     goto err;
271			}
272			if (npgno != 0)
273				GET_PGNO(dp, lock->pgno);
274		} while (npgno-- != 0);
275		lock->pgno = save_pgno;
276	}
277
278err:	LOCK_SYSTEM_UNLOCK(lt, region);
279	if (data != NULL)
280		__os_free(env, data);
281	return (ret);
282}
283
284#define	UINT32_CMP(A, B)	((A) == (B) ? 0 : ((A) > (B) ? 1 : -1))
285static int
286__lock_sort_cmp(a, b)
287	const void *a, *b;
288{
289	const DBT *d1, *d2;
290	DB_LOCK_ILOCK *l1, *l2;
291
292	d1 = a;
293	d2 = b;
294
295	/* Force all non-standard locks to sort at end. */
296	if (d1->size != sizeof(DB_LOCK_ILOCK)) {
297		if (d2->size != sizeof(DB_LOCK_ILOCK))
298			return (UINT32_CMP(d1->size, d2->size));
299		else
300			return (1);
301	} else if (d2->size != sizeof(DB_LOCK_ILOCK))
302		return (-1);
303
304	l1 = d1->data;
305	l2 = d2->data;
306	if (l1->type != l2->type)
307		return (UINT32_CMP(l1->type, l2->type));
308	return (memcmp(l1->fileid, l2->fileid, DB_FILE_ID_LEN));
309}
310
311/*
312 * PUBLIC: void __lock_list_print __P((ENV *, DBT *));
313 */
314void
315__lock_list_print(env, list)
316	ENV *env;
317	DBT *list;
318{
319	DB_LOCK_ILOCK *lock;
320	db_pgno_t pgno;
321	u_int16_t npgno, size;
322	u_int32_t i, nlocks;
323	u_int8_t *fidp;
324	char *fname, *dname, *p, namebuf[26];
325	void *dp;
326
327	if (list->size == 0)
328		return;
329	dp = list->data;
330
331	GET_COUNT(dp, nlocks);
332
333	for (i = 0; i < nlocks; i++) {
334		GET_PCOUNT(dp, npgno);
335		GET_SIZE(dp, size);
336		lock = (DB_LOCK_ILOCK *) dp;
337		fidp = lock->fileid;
338		(void)__dbreg_get_name(env, fidp, &fname, &dname);
339		printf("\t");
340		if (fname == NULL && dname == NULL)
341			printf("(%lx %lx %lx %lx %lx)",
342			(u_long)fidp[0], (u_long)fidp[1], (u_long)fidp[2],
343			(u_long)fidp[3], (u_long)fidp[4]);
344		else {
345			if (fname != NULL && dname != NULL) {
346				(void)snprintf(namebuf, sizeof(namebuf),
347				    "%14s.%-10s", fname, dname);
348				p = namebuf;
349			} else if (fname != NULL)
350				p = fname;
351			else
352				p = dname;
353			printf("%-25s", p);
354		}
355		dp = ((u_int8_t *)dp) + DB_ALIGN(size, sizeof(u_int32_t));
356		LOGCOPY_32(env, &pgno, &lock->pgno);
357		do {
358			printf(" %d", pgno);
359			if (npgno != 0)
360				GET_PGNO(dp, pgno);
361		} while (npgno-- != 0);
362		printf("\n");
363	}
364}
365