1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/* #pragma ident	"@(#)lockstat.c	1.12	08/01/16 SMI" */
27
28
29#ifdef KERNEL
30#ifndef _KERNEL
31#define _KERNEL /* Solaris vs. Darwin */
32#endif
33#endif
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/errno.h>
38#include <sys/stat.h>
39#include <sys/ioctl.h>
40#include <sys/conf.h>
41#include <sys/fcntl.h>
42#include <miscfs/devfs/devfs.h>
43
44#include <sys/dtrace.h>
45#include <sys/dtrace_impl.h>
46
47#include <sys/dtrace_glue.h>
48
49#include <sys/lockstat.h>
50
51#include <kern/processor.h>
52
53#define membar_producer dtrace_membar_producer
54
55/*
56 * Hot patch values, x86
57 */
58#if defined(__x86_64__)
59#define	NOP	0x90
60#define	RET	0xc3
61#define LOCKSTAT_AFRAMES 1
62#else
63#error "not ported to this architecture"
64#endif
65
66typedef struct lockstat_probe {
67	const char	*lsp_func;
68	const char	*lsp_name;
69	int		lsp_probe;
70	dtrace_id_t	lsp_id;
71} lockstat_probe_t;
72
73lockstat_probe_t lockstat_probes[] =
74{
75#if defined(__x86_64__)
76	/* Only provide implemented probes for each architecture  */
77	{ LS_LCK_MTX_LOCK,	LSA_ACQUIRE,	LS_LCK_MTX_LOCK_ACQUIRE, DTRACE_IDNONE },
78	{ LS_LCK_MTX_LOCK,	LSA_SPIN,	LS_LCK_MTX_LOCK_SPIN, DTRACE_IDNONE },
79	{ LS_LCK_MTX_LOCK,	LSA_BLOCK,	LS_LCK_MTX_LOCK_BLOCK, DTRACE_IDNONE },
80	{ LS_LCK_MTX_TRY_LOCK,	LSA_ACQUIRE,	LS_LCK_MTX_TRY_LOCK_ACQUIRE, DTRACE_IDNONE },
81	{ LS_LCK_MTX_TRY_SPIN_LOCK, LSA_ACQUIRE, LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE, DTRACE_IDNONE },
82	{ LS_LCK_MTX_UNLOCK,	LSA_RELEASE,	LS_LCK_MTX_UNLOCK_RELEASE, DTRACE_IDNONE },
83	{ LS_LCK_MTX_EXT_LOCK,	LSA_ACQUIRE,	LS_LCK_MTX_EXT_LOCK_ACQUIRE, DTRACE_IDNONE },
84	{ LS_LCK_MTX_EXT_LOCK,	LSA_SPIN,	LS_LCK_MTX_EXT_LOCK_SPIN, DTRACE_IDNONE },
85	{ LS_LCK_MTX_EXT_LOCK,	LSA_BLOCK,	LS_LCK_MTX_EXT_LOCK_BLOCK, DTRACE_IDNONE },
86//	{ LS_LCK_MTX_EXT_TRY_LOCK, LSA_ACQUIRE,	LS_LCK_MTX_TRY_EXT_LOCK_ACQUIRE, DTRACE_IDNONE },
87	{ LS_LCK_MTX_EXT_UNLOCK,   LSA_RELEASE,	LS_LCK_MTX_EXT_UNLOCK_RELEASE, DTRACE_IDNONE },
88	{ LS_LCK_MTX_LOCK_SPIN_LOCK,	LSA_ACQUIRE,	LS_LCK_MTX_LOCK_SPIN_ACQUIRE, DTRACE_IDNONE },
89	{ LS_LCK_RW_LOCK_SHARED,	LSR_ACQUIRE,	LS_LCK_RW_LOCK_SHARED_ACQUIRE, DTRACE_IDNONE },
90	{ LS_LCK_RW_LOCK_SHARED,	LSR_BLOCK,	LS_LCK_RW_LOCK_SHARED_BLOCK, DTRACE_IDNONE },
91	{ LS_LCK_RW_LOCK_SHARED,	LSR_SPIN,	LS_LCK_RW_LOCK_SHARED_SPIN, DTRACE_IDNONE },
92	{ LS_LCK_RW_LOCK_EXCL,		LSR_ACQUIRE,	LS_LCK_RW_LOCK_EXCL_ACQUIRE, DTRACE_IDNONE },
93	{ LS_LCK_RW_LOCK_EXCL,		LSR_BLOCK,	LS_LCK_RW_LOCK_EXCL_BLOCK, DTRACE_IDNONE },
94	{ LS_LCK_RW_LOCK_EXCL,		LSR_SPIN,	LS_LCK_RW_LOCK_EXCL_SPIN, DTRACE_IDNONE },
95	{ LS_LCK_RW_DONE,		LSR_RELEASE,	LS_LCK_RW_DONE_RELEASE, DTRACE_IDNONE },
96	{ LS_LCK_RW_TRY_LOCK_SHARED,	LSR_ACQUIRE,	LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE, DTRACE_IDNONE },
97	{ LS_LCK_RW_TRY_LOCK_EXCL,	LSR_ACQUIRE,	LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE, DTRACE_IDNONE },
98	{ LS_LCK_RW_LOCK_SHARED_TO_EXCL, LSR_UPGRADE,	LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, DTRACE_IDNONE },
99	{ LS_LCK_RW_LOCK_SHARED_TO_EXCL,	LSR_SPIN,	LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, DTRACE_IDNONE },
100	{ LS_LCK_RW_LOCK_SHARED_TO_EXCL,	LSR_BLOCK,	LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, DTRACE_IDNONE },
101	{ LS_LCK_RW_LOCK_EXCL_TO_SHARED,	LSR_DOWNGRADE,	LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, DTRACE_IDNONE },
102#endif
103#ifdef	LATER
104	/* Interlock and spinlock measurements would be nice, but later */
105	{ LS_LCK_SPIN_LOCK,	LSS_ACQUIRE,	LS_LCK_SPIN_LOCK_ACQUIRE, DTRACE_IDNONE },
106	{ LS_LCK_SPIN_LOCK,	LSS_SPIN,	LS_LCK_SPIN_LOCK_SPIN, DTRACE_IDNONE },
107	{ LS_LCK_SPIN_UNLOCK,	LSS_RELEASE,	LS_LCK_SPIN_UNLOCK_RELEASE, DTRACE_IDNONE },
108
109	{ LS_LCK_RW_LOCK_EXCL_TO_SHARED,	LSA_ILK_SPIN,	LS_LCK_RW_LOCK_EXCL_TO_SHARED_ILK_SPIN, DTRACE_IDNONE },
110	{ LS_LCK_MTX_LOCK,	LSA_ILK_SPIN,	LS_LCK_MTX_LOCK_ILK_SPIN, DTRACE_IDNONE },
111	{ LS_LCK_MTX_EXT_LOCK,	LSA_ILK_SPIN,	LS_LCK_MTX_EXT_LOCK_ILK_SPIN, DTRACE_IDNONE },
112	{ LS_LCK_RW_TRY_LOCK_EXCL,	LSA_ILK_SPIN,	LS_LCK_RW_TRY_LOCK_EXCL_ILK_SPIN, DTRACE_IDNONE },
113	{ LS_LCK_RW_TRY_LOCK_SHARED,	LSA_SPIN,	LS_LCK_RW_TRY_LOCK_SHARED_SPIN, DTRACE_IDNONE },
114#endif
115
116	{ NULL, NULL, 0, 0 }
117};
118
119dtrace_id_t lockstat_probemap[LS_NPROBES];
120
121#if CONFIG_DTRACE
122#if defined(__x86_64__)
123extern void lck_mtx_lock_lockstat_patch_point(void);
124extern void lck_mtx_try_lock_lockstat_patch_point(void);
125extern void lck_mtx_try_lock_spin_lockstat_patch_point(void);
126extern void lck_mtx_unlock_lockstat_patch_point(void);
127extern void lck_mtx_lock_ext_lockstat_patch_point(void);
128extern void lck_mtx_ext_unlock_lockstat_patch_point(void);
129extern void lck_rw_lock_shared_lockstat_patch_point(void);
130extern void lck_rw_lock_exclusive_lockstat_patch_point(void);
131extern void lck_rw_lock_shared_to_exclusive_lockstat_patch_point(void);
132extern void lck_rw_try_lock_shared_lockstat_patch_point(void);
133extern void lck_rw_try_lock_exclusive_lockstat_patch_point(void);
134extern void lck_mtx_lock_spin_lockstat_patch_point(void);
135#endif
136
137#endif /* CONFIG_DTRACE */
138
139typedef struct lockstat_assembly_probe {
140	int lsap_probe;
141	vm_offset_t * lsap_patch_point;
142} lockstat_assembly_probe_t;
143
144
145	lockstat_assembly_probe_t assembly_probes[] =
146	{
147#if CONFIG_DTRACE
148#if defined(__x86_64__)
149		/*
150		 * On x86 these points are better done via hot patches, which ensure
151		 * there is zero overhead when not in use.  On x86 these patch points
152		 * are swapped between the return instruction and a no-op, with the
153		 * Dtrace call following the return.
154		 */
155		{ LS_LCK_MTX_LOCK_ACQUIRE,		(vm_offset_t *) lck_mtx_lock_lockstat_patch_point },
156		{ LS_LCK_MTX_TRY_LOCK_ACQUIRE,		(vm_offset_t *) lck_mtx_try_lock_lockstat_patch_point },
157		{ LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE,	(vm_offset_t *) lck_mtx_try_lock_spin_lockstat_patch_point },
158		{ LS_LCK_MTX_UNLOCK_RELEASE,		(vm_offset_t *) lck_mtx_unlock_lockstat_patch_point },
159		{ LS_LCK_MTX_EXT_LOCK_ACQUIRE,		(vm_offset_t *) lck_mtx_lock_ext_lockstat_patch_point },
160		{ LS_LCK_MTX_EXT_UNLOCK_RELEASE,	(vm_offset_t *) lck_mtx_ext_unlock_lockstat_patch_point },
161		{ LS_LCK_RW_LOCK_SHARED_ACQUIRE,	(vm_offset_t *) lck_rw_lock_shared_lockstat_patch_point },
162		{ LS_LCK_RW_LOCK_EXCL_ACQUIRE,		(vm_offset_t *) lck_rw_lock_exclusive_lockstat_patch_point },
163		{ LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE,(vm_offset_t *) lck_rw_lock_shared_to_exclusive_lockstat_patch_point },
164		{ LS_LCK_RW_TRY_LOCK_SHARED_ACQUIRE,	(vm_offset_t *) lck_rw_try_lock_shared_lockstat_patch_point },
165		{ LS_LCK_RW_TRY_LOCK_EXCL_ACQUIRE,	(vm_offset_t *) lck_rw_try_lock_exclusive_lockstat_patch_point },
166		{ LS_LCK_MTX_LOCK_SPIN_ACQUIRE,		(vm_offset_t *) lck_mtx_lock_spin_lockstat_patch_point },
167#endif
168#endif /* CONFIG_DTRACE */
169		{ LS_LCK_INVALID, NULL }
170};
171
172
173/*
174 * Hot patch switches back and forth the probe points between NOP and RET.
175 * The active argument indicates whether the probe point will turn on or off.
176 *	on == plant a NOP and thus fall through to the probe call
177 *     off == plant a RET and thus avoid the probe call completely
178 * The lsap_probe identifies which probe we will patch.
179 */
180#if defined(__APPLE__)
181static
182void lockstat_hot_patch(boolean_t active, int ls_probe)
183{
184#pragma unused(active)
185	int i;
186
187	/*
188	 * Loop through entire table, in case there are
189	 * multiple patch points per probe.
190	 */
191	for (i = 0; assembly_probes[i].lsap_patch_point; i++) {
192		if (ls_probe == assembly_probes[i].lsap_probe)
193#if defined(__x86_64__)
194		{
195			uint8_t instr;
196			instr = (active ? NOP : RET );
197			(void) ml_nofault_copy( (vm_offset_t)&instr, *(assembly_probes[i].lsap_patch_point),
198								sizeof(instr));
199		}
200#endif
201	} /* for */
202}
203#endif /* __APPLE__*/
204
205
206void (*lockstat_probe)(dtrace_id_t, uint64_t, uint64_t,
207				    uint64_t, uint64_t, uint64_t);
208
209#if defined(__APPLE__)
210/* This wrapper is used by arm assembler hot patched probes */
211void
212lockstat_probe_wrapper(int probe, uintptr_t lp, int rwflag)
213{
214	dtrace_id_t id;
215	id = lockstat_probemap[probe];
216	if (id != 0)
217	{
218		(*lockstat_probe)(id, (uintptr_t)lp, (uint64_t)rwflag, 0,0,0);
219	}
220}
221#endif /* __APPLE__ */
222
223
224static dev_info_t	*lockstat_devi;	/* saved in xxattach() for xxinfo() */
225static dtrace_provider_id_t lockstat_id;
226
227/*ARGSUSED*/
228static int
229lockstat_enable(void *arg, dtrace_id_t id, void *parg)
230{
231#pragma unused(arg) /* __APPLE__ */
232
233	lockstat_probe_t *probe = parg;
234
235	ASSERT(!lockstat_probemap[probe->lsp_probe]);
236
237	lockstat_probemap[probe->lsp_probe] = id;
238	membar_producer();
239
240	lockstat_hot_patch(TRUE, probe->lsp_probe);
241	membar_producer();
242	return(0);
243
244}
245
246/*ARGSUSED*/
247static void
248lockstat_disable(void *arg, dtrace_id_t id, void *parg)
249{
250#pragma unused(arg, id) /* __APPLE__ */
251
252	lockstat_probe_t *probe = parg;
253	int i;
254
255	ASSERT(lockstat_probemap[probe->lsp_probe]);
256
257	lockstat_probemap[probe->lsp_probe] = 0;
258	lockstat_hot_patch(FALSE, probe->lsp_probe);
259	membar_producer();
260
261	/*
262	 * See if we have any probes left enabled.
263	 */
264	for (i = 0; i < LS_NPROBES; i++) {
265		if (lockstat_probemap[i]) {
266			/*
267			 * This probe is still enabled.  We don't need to deal
268			 * with waiting for all threads to be out of the
269			 * lockstat critical sections; just return.
270			 */
271			return;
272		}
273	}
274
275}
276
277/*ARGSUSED*/
278static void
279lockstat_provide(void *arg, const dtrace_probedesc_t *desc)
280{
281#pragma unused(arg, desc) /* __APPLE__ */
282
283	int i = 0;
284
285	for (i = 0; lockstat_probes[i].lsp_func != NULL; i++) {
286		lockstat_probe_t *probe = &lockstat_probes[i];
287
288		if (dtrace_probe_lookup(lockstat_id, "mach_kernel",
289		    probe->lsp_func, probe->lsp_name) != 0)
290			continue;
291
292		ASSERT(!probe->lsp_id);
293		probe->lsp_id = dtrace_probe_create(lockstat_id,
294		    "mach_kernel", probe->lsp_func, probe->lsp_name,
295		    LOCKSTAT_AFRAMES, probe);
296	}
297}
298
299
300/*ARGSUSED*/
301static void
302lockstat_destroy(void *arg, dtrace_id_t id, void *parg)
303{
304#pragma unused(arg, id) /* __APPLE__ */
305
306	lockstat_probe_t *probe = parg;
307
308	ASSERT(!lockstat_probemap[probe->lsp_probe]);
309	probe->lsp_id = 0;
310}
311
312static dtrace_pattr_t lockstat_attr = {
313{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
314{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
315{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
316{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
317{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
318};
319
320static dtrace_pops_t lockstat_pops = {
321	lockstat_provide,
322	NULL,
323	lockstat_enable,
324	lockstat_disable,
325	NULL,
326	NULL,
327	NULL,
328	NULL,
329	NULL,
330	lockstat_destroy
331};
332
333static int
334lockstat_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
335{
336	switch (cmd) {
337	case DDI_ATTACH:
338		break;
339	case DDI_RESUME:
340		return (DDI_SUCCESS);
341	default:
342		return (DDI_FAILURE);
343	}
344
345	if (ddi_create_minor_node(devi, "lockstat", S_IFCHR, 0,
346	    DDI_PSEUDO, 0) == DDI_FAILURE ||
347	    dtrace_register("lockstat", &lockstat_attr, DTRACE_PRIV_KERNEL,
348	    NULL, &lockstat_pops, NULL, &lockstat_id) != 0) {
349		ddi_remove_minor_node(devi, NULL);
350		return (DDI_FAILURE);
351	}
352
353	lockstat_probe = dtrace_probe;
354	membar_producer();
355
356	ddi_report_dev(devi);
357	lockstat_devi = devi;
358	return (DDI_SUCCESS);
359}
360
361d_open_t _lockstat_open;
362
363int _lockstat_open(dev_t dev, int flags, int devtype, struct proc *p)
364{
365#pragma unused(dev,flags,devtype,p)
366	return 0;
367}
368
369#define LOCKSTAT_MAJOR  -24 /* let the kernel pick the device number */
370
371/*
372 * A struct describing which functions will get invoked for certain
373 * actions.
374 */
375static struct cdevsw lockstat_cdevsw =
376{
377	_lockstat_open,		/* open */
378	eno_opcl,			/* close */
379	eno_rdwrt,			/* read */
380	eno_rdwrt,			/* write */
381	eno_ioctl,			/* ioctl */
382	(stop_fcn_t *)nulldev, /* stop */
383	(reset_fcn_t *)nulldev, /* reset */
384	NULL,				/* tty's */
385	eno_select,			/* select */
386	eno_mmap,			/* mmap */
387	eno_strat,			/* strategy */
388	eno_getc,			/* getc */
389	eno_putc,			/* putc */
390	0					/* type */
391};
392
393static int gLockstatInited = 0;
394
395void lockstat_init( void );
396
397void lockstat_init( void )
398{
399	if (0 == gLockstatInited)
400	{
401		int majdevno = cdevsw_add(LOCKSTAT_MAJOR, &lockstat_cdevsw);
402
403		if (majdevno < 0) {
404			printf("lockstat_init: failed to allocate a major number!\n");
405			gLockstatInited = 0;
406			return;
407		}
408
409		lockstat_attach( (dev_info_t	*)(uintptr_t)majdevno, DDI_ATTACH );
410		gLockstatInited = 1;
411	} else
412		panic("lockstat_init: called twice!\n");
413}
414#undef LOCKSTAT_MAJOR
415