winlockio.c revision 7656:2621e50fdf4a
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27/*
28 * This is the lock device driver.
29 *
30 * The lock driver provides a variation of inter-process mutexes with the
31 * following twist in semantics:
32 *	A waiter for a lock after a set timeout can "break" the lock and
33 *	grab it from the current owner (without informing the owner).
34 *
35 * These semantics result in temporarily multiple processes thinking they
36 * own the lock. This usually does not make sense for cases where locks are
37 * used to protect a critical region and it is important to serialize access
38 * to data structures. As breaking the lock will also lose the serialization
39 * and result in corrupt data structures.
40 *
41 * The usage for winlock driver is primarily driven by the graphics system
42 * when doing DGA (direct graphics access) graphics. The locks are used to
43 * protect access to the frame buffer (presumably reflects back to the screen)
44 * between competing processes that directly write to the screen as opposed
45 * to going through the window server etc.
46 * In this case, the result of breaking the lock at worst causes the screen
47 * image to be distorted and is easily fixed by doing a "refresh"
48 *
49 * In well-behaved applications, the lock is held for a very short time and
50 * the breaking semantics do not come into play. Not having this feature and
51 * using normal inter-process mutexes will result in a misbehaved application
52 * from grabbing the screen writing capability from the window manager and
53 * effectively make the system look like it is hung (mouse pointer does not
54 * move).
55 *
56 * A secondary aspect of the winlock driver is that it allows for extremely
57 * fast lock acquire/release in cases where there is low contention. A memory
58 * write is all that is needed (not even a function call). And the window
59 * manager is the only DGA writer usually and this optimized for. Occasionally
60 * some processes might do DGA graphics and cause kernel faults to handle
61 * the contention/locking (and that has got to be slow!).
62 *
63 * The following IOCTLs are supported:
64 *
65 *   GRABPAGEALLOC:
66 *	Compatibility with old cgsix device driver lockpage ioctls.
67 *	Lockpages created this way must be an entire page for compatibility with
68 *	older software.	 This ioctl allocates a lock context with its own
69 *	private lock page.  The unique "ident" that identifies this lock is
70 *	returned.
71 *
72 *   GRABPAGEFREE:
73 *	Compatibility with cgsix device driver lockpage ioctls.	 This
74 *	ioctl releases the lock context allocated by GRABPAGEALLOC.
75 *
76 *   GRABLOCKINFO:
77 *	Returns a one-word flag.  '1' means that multiple clients may
78 *	access this lock page.	Older device drivers returned '0',
79 *	meaning that only two clients could access a lock page.
80 *
81 *   GRABATTACH:
82 *	Not supported.	This ioctl would have grabbed all lock pages
83 *	on behalf of the calling program.
84 *
85 *   WINLOCKALLOC:
86 *	Allocate a lock context.  This ioctl accepts a key value.  as
87 *	its argument.  If the key is zero, a new lock context is
88 *	created, and its "ident" is returned.	If the key is nonzero,
89 *	all existing contexts are checked to see if they match they
90 *	key.  If a match is found, its reference count is incremented
91 *	and its ident is returned, otherwise a new context is created
92 *	and its ident is returned.
93 *
94 *   WINLOCKFREE:
95 *	Free a lock context.  This ioctl accepts the ident of a lock
96 *	context and decrements its reference count.  Once the reference
97 *	count reaches zero *and* all mappings are released, the lock
98 *	context is freed.  When all the lock context in the lock page are
99 *	freed, the lock page is freed as well.
100 *
101 *   WINLOCKSETTIMEOUT:
102 *	Set lock timeout for a context.	 This ioctl accepts the ident
103 *	of a lock context and a timeout value in milliseconds.
104 *	Whenever lock contention occurs, the timer is started and the lock is
105 *	broken after the timeout expires. If timeout value is zero, lock does
106 *	not timeout.  This value will be rounded to the nearest clock
107 *	tick, so don't try to use it for real-time control or something.
108 *
109 *   WINLOCKGETTIMEOUT:
110 *	Get lock timeout from a context.
111 *
112 *   WINLOCKDUMP:
113 *	Dump state of this device.
114 *
115 *
116 * How /dev/winlock works:
117 *
118 *   Every lock context consists of two mappings for the client to the lock
119 *   page.  These mappings are known as the "lock page" and "unlock page"
120 *   to the client. The first mmap to the lock context (identified by the
121 *   sy_ident field returns during alloc) allocates mapping to the lock page,
122 *   the second mmap allocates a mapping to the unlock page.
123 *	The mappings dont have to be ordered in virtual address space, but do
124 *   need to be ordered in time. Mapping and unmapping of these lock and unlock
125 *   pages should happen in pairs. Doing them one at a time or unmapping one
126 *   and leaving one mapped etc cause undefined behaviors.
127 *	The mappings are always of length PAGESIZE, and type MAP_SHARED.
128 *
129 *   The first ioctl is to ALLOC a lock, either based on a key (if trying to
130 *	grab a preexisting lock) or 0 (gets a default new one)
131 *	This ioctl returns a value in sy_ident which is needed to do the
132 *	later mmaps and FREE/other ioctls.
133 *
134 *   The "page number" portion of the sy_ident needs to be passed as the
135 *	file offset when doing an mmap for both the lock page and unlock page
136 *
137 *   The value returned by mmap ( a user virtual address) needs to be
138 *	incremented by the "page offset" portion of sy_ident to obtain the
139 *	pointer to the actual lock. (Skipping this step, does not cause any
140 *	visible error, but the process will be using the wrong lock!)
141 *
142 *	On a fork(), the child process will inherit the mappings for free, but
143 *   will not inherit the parent's lock ownership if any. The child should NOT
144 *   do an explicit FREE on the lock context unless it did an explicit ALLOC.
145 *	Only one process at a time is allowed to have a valid hat
146 *   mapping to a lock page. This is enforced by this driver.
147 *   A client acquires a lock by writing a '1' to the lock page.
148 *   Note, that it is not necessary to read and veryify that the lock is '0'
149 *	prior to writing a '1' in it.
150 *   If it does not already have a valid mapping to that page, the driver
151 *   takes a fault (devmap_access), loads the client mapping
152 *   and allows the client to continue.	 The client releases the lock by
153 *   writing a '0' to the unlock page.	Again, if it does not have a valid
154 *   mapping to the unlock page, the segment driver takes a fault,
155 *   loads the mapping, and lets the client continue.  From this point
156 *   forward, the client can make as many locks and unlocks as it
157 *   wants, without any more faults into the kernel.
158 *
159 *   If a different process wants to acquire a lock, it takes a page fault
160 *   when it writes the '1' to the lock page.  If the segment driver sees
161 *   that the lock page contained a zero, then it invalidates the owner's
162 *   mappings and gives the mappings to this process.
163 *
164 *   If there is already a '1' in the lock page when the second client
165 *   tries to access the lock page, then a lock exists.	 The segment
166 *   driver sleeps the second client and, if applicable, starts the
167 *   timeout on the lock.  The owner's mapping to the unlock page
168 *   is invalidated so that the driver will be woken again when the owner
169 *   releases the lock.
170 *
171 *   When the locking client finally writes a '0' to the unlock page, the
172 *   segment driver takes another fault.  The client is given a valid
173 *   mapping, not to the unlock page, but to the "trash page", and allowed
174 *   to continue.  Meanwhile, the sleeping client is given a valid mapping
175 *   to the lock/unlock pages and allowed to continue as well.
176 *
177 * RFE: There is a leak if process exits before freeing allocated locks
178 * But currently not tracking which locks were allocated by which
179 * process and we do not have a clean entry point into the driver
180 * to do garbage collection. If the interface used a file descriptor for each
181 * lock it allocs, then the driver can free up stuff in the _close routine
182 */
183
184#include <sys/types.h>		/* various type defn's */
185#include <sys/debug.h>
186#include <sys/param.h>		/* various kernel limits */
187#include <sys/time.h>
188#include <sys/errno.h>
189#include <sys/kmem.h>		/* defines kmem_alloc() */
190#include <sys/conf.h>		/* defines cdevsw */
191#include <sys/file.h>		/* various file modes, etc. */
192#include <sys/uio.h>		/* UIO stuff */
193#include <sys/ioctl.h>
194#include <sys/cred.h>		/* defines cred struct */
195#include <sys/mman.h>		/* defines mmap(2) parameters */
196#include <sys/stat.h>		/* defines S_IFCHR */
197#include <sys/cmn_err.h>	/* use cmn_err */
198#include <sys/ddi.h>		/* ddi stuff */
199#include <sys/sunddi.h>		/* ddi stuff */
200#include <sys/ddi_impldefs.h>	/* ddi stuff */
201#include <sys/winlockio.h>	/* defines ioctls, flags, data structs */
202
203static int	winlock_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
204static int	winlock_devmap(dev_t, devmap_cookie_t, offset_t, size_t,
205			size_t *, uint_t);
206static int	winlocksegmap(dev_t, off_t, struct as *, caddr_t *, off_t,
207			uint_t, uint_t, uint_t, cred_t *);
208
209static struct cb_ops	winlock_cb_ops = {
210	nulldev,		/* open */
211	nulldev,		/* close */
212	nodev,			/* strategy */
213	nodev,			/* print */
214	nodev,			/* dump */
215	nodev,			/* read */
216	nodev,			/* write */
217	winlock_ioctl,		/* ioctl */
218	winlock_devmap,		/* devmap */
219	nodev,			/* mmap */
220	winlocksegmap,		/* segmap */
221	nochpoll,		/* poll */
222	ddi_prop_op,		/* prop_op */
223	NULL,			/* streamtab */
224	D_NEW|D_MP|D_DEVMAP,	/* Driver compatibility flag */
225	0,			/* rev */
226	nodev,			/* aread */
227	nodev			/* awrite */
228};
229
230static int winlock_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
231static int winlock_attach(dev_info_t *, ddi_attach_cmd_t);
232static int winlock_detach(dev_info_t *, ddi_detach_cmd_t);
233
234static struct dev_ops	winlock_ops = {
235	DEVO_REV,
236	0,			/* refcount */
237	winlock_info,		/* info */
238	nulldev,		/* identify */
239	nulldev,		/* probe */
240	winlock_attach,		/* attach */
241	winlock_detach,		/* detach */
242	nodev,			/* reset */
243	&winlock_cb_ops,	/* driver ops */
244	NULL,			/* bus ops */
245	NULL,			/* power */
246	ddi_quiesce_not_needed,		/* quiesce */
247};
248
249static int winlockmap_map(devmap_cookie_t, dev_t, uint_t, offset_t, size_t,
250		void **);
251static void winlockmap_unmap(devmap_cookie_t, void *, offset_t, size_t,
252		devmap_cookie_t, void **, devmap_cookie_t, void **);
253static int winlockmap_dup(devmap_cookie_t, void *,
254		devmap_cookie_t, void **);
255static int winlockmap_access(devmap_cookie_t, void *, offset_t, size_t,
256		uint_t, uint_t);
257
258static
259struct devmap_callback_ctl winlockmap_ops = {
260	DEVMAP_OPS_REV,
261	winlockmap_map,
262	winlockmap_access,
263	winlockmap_dup,
264	winlockmap_unmap,
265};
266
267#if DEBUG
268static	int	lock_debug = 0;
269#define	DEBUGF(level, args)	{ if (lock_debug >= (level)) cmn_err args; }
270#else
271#define	DEBUGF(level, args)
272#endif
273
274/* Driver supports two styles of locks */
275enum winlock_style { NEWSTYLE_LOCK, OLDSTYLE_LOCK };
276
277/*
278 * These structures describe a lock context.  We permit multiple
279 * clients (not just two) to access a lock page
280 *
281 * The "cookie" identifies the lock context. It is the page number portion
282 * sy_ident returned on lock allocation. Cookie is used in later ioctls.
283 * "cookie" is lockid * PAGESIZE
284 * "lockptr" is the kernel virtual address to the lock itself
285 * The page offset portion of lockptr is the page offset portion of sy_ident
286 */
287
288/*
289 * per-process information about locks.  This is the private field of
290 * a devmap mapping.  Note that usually *two* mappings point to this.
291 */
292
293/*
294 * Each process using winlock is associated with a segproc structure
295 * In various driver entry points, we need to search to find the right
296 * segproc structure (If we were using file handles for each lock this
297 * would not have been necessary).
298 * It would have been simple to use the process pid (and ddi_get_pid)
299 * However, during fork devmap_dup is called in the parent process context
300 * and using the pid complicates the code by introducing orphans.
301 * Instead we use the as pointer for the process as a cookie
302 * which requires delving into various non-DDI kosher structs
303 */
304typedef struct segproc {
305	struct segproc	*next;		/* next client of this lock */
306	struct seglock	*lp;		/* associated lock context */
307	devmap_cookie_t	lockseg;	/* lock mapping, if any */
308	devmap_cookie_t unlockseg;	/* unlock mapping, if any */
309	void		*tag;		/* process as pointer as tag */
310	uint_t		flag;		/* see "flag bits" in winlockio.h */
311} SegProc;
312
313#define	ID(sdp)		((sdp)->tag)
314#define	CURPROC_ID	(void *)(curproc->p_as)
315
316/* per lock context information */
317
318typedef struct seglock {
319	struct seglock	*next;		/* next lock */
320	uint_t		sleepers;	/* nthreads sleeping on this lock */
321	uint_t		alloccount;	/* how many times created? */
322	uint_t		cookie;		/* mmap() offset (page #) into device */
323	uint_t		key;		/* key, if any */
324	enum winlock_style	style;	/* style of lock - OLDSTYLE, NEWSTYLE */
325	clock_t		timeout;	/* sleep time in ticks */
326	ddi_umem_cookie_t umem_cookie;	/* cookie for umem allocated memory */
327	int		*lockptr;	/* kernel virtual addr of lock */
328	struct segproc	*clients;	/* list of clients of this lock */
329	struct segproc	*owner;		/* current owner of lock */
330	kmutex_t	mutex;		/* mutex for lock */
331	kcondvar_t	locksleep;	/* for sleeping on lock */
332} SegLock;
333
334#define	LOCK(lp)	(*((lp)->lockptr))
335
336/*
337 * Number of locks that can fit in a page. Driver can support only that many.
338 * For oldsytle locks, it is relatively easy to increase the limit as each
339 * is in a separate page (MAX_LOCKS mostly serves to prevent runaway allocation
340 * For newstyle locks, this is trickier as the code needs to allow for mapping
341 * into the second or third page of the cookie for some locks.
342 */
343#define	MAX_LOCKS	(PAGESIZE/sizeof (int))
344
345#define	LOCKTIME	3	/* Default lock timeout in seconds */
346
347
348/* Protections setting for winlock user mappings */
349#define	WINLOCK_PROT	(PROT_READ|PROT_WRITE|PROT_USER)
350
351/*
352 * The trash page is where unwanted writes go
353 * when a process is releasing a lock.
354 */
355static	ddi_umem_cookie_t trashpage_cookie = NULL;
356
357/* For newstyle allocations a common page of locks is used */
358static	caddr_t	lockpage = NULL;
359static	ddi_umem_cookie_t lockpage_cookie = NULL;
360
361static	dev_info_t	*winlock_dip = NULL;
362static	kmutex_t	winlock_mutex;
363
364/*
365 * winlock_mutex protects
366 *	lock_list
367 *	lock_free_list
368 *	"next" field in SegLock
369 *	next_lock
370 *	trashpage_cookie
371 *	lockpage & lockpage_cookie
372 *
373 * SegLock_mutex protects
374 *	rest of fields in SegLock
375 *	All fields in list of SegProc (lp->clients)
376 *
377 * Lock ordering is winlock_mutex->SegLock_mutex
378 * During devmap/seg operations SegLock_mutex acquired without winlock_mutex
379 *
380 * During devmap callbacks, the pointer to SegProc is stored as the private
381 * data in the devmap handle. This pointer will not go stale (i.e., the
382 * SegProc getting deleted) as the SegProc is not deleted until both the
383 * lockseg and unlockseg have been unmapped and the pointers stored in
384 * the devmap handles have been NULL'ed.
385 * But before this pointer is used to access any fields (other than the 'lp')
386 * lp->mutex must be held.
387 */
388
389/*
390 * The allocation code tries to allocate from lock_free_list
391 * first, otherwise it uses kmem_zalloc.  When lock list is idle, all
392 * locks in lock_free_list are kmem_freed
393 */
394static	SegLock	*lock_list = NULL;		/* in-use locks */
395static	SegLock	*lock_free_list = NULL;		/* free locks */
396static	int	next_lock = 0;			/* next lock cookie */
397
398/* Routines to find a lock in lock_list based on offset or key */
399static SegLock *seglock_findlock(uint_t);
400static SegLock *seglock_findkey(uint_t);
401
402/* Routines to find and allocate SegProc structures */
403static SegProc *seglock_find_specific(SegLock *, void *);
404static SegProc *seglock_alloc_specific(SegLock *, void *);
405#define	seglock_findclient(lp)	seglock_find_specific((lp), CURPROC_ID)
406#define	seglock_allocclient(lp)	seglock_alloc_specific((lp), CURPROC_ID)
407
408/* Delete client from lock's client list */
409static void seglock_deleteclient(SegLock *, SegProc *);
410static void garbage_collect_lock(SegLock *, SegProc *);
411
412/* Create a new lock */
413static SegLock *seglock_createlock(enum winlock_style);
414/* Destroy lock */
415static void seglock_destroylock(SegLock *);
416static void lock_destroyall(void);
417
418/* Helper functions in winlockmap_access */
419static int give_mapping(SegLock *, SegProc *, uint_t);
420static int lock_giveup(SegLock *, int);
421static int seglock_lockfault(devmap_cookie_t, SegProc *, SegLock *, uint_t);
422
423/* routines called from ioctl */
424static int seglock_graballoc(intptr_t, enum winlock_style, int);
425static int seglock_grabinfo(intptr_t, int);
426static int seglock_grabfree(intptr_t, int);
427static int seglock_gettimeout(intptr_t, int);
428static int seglock_settimeout(intptr_t, int);
429static void seglock_dump_all(void);
430
431static	int
432winlock_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
433{
434	DEBUGF(1, (CE_CONT, "winlock_attach, devi=%p, cmd=%d\n",
435	    (void *)devi, (int)cmd));
436	if (cmd != DDI_ATTACH)
437		return (DDI_FAILURE);
438	if (ddi_create_minor_node(devi, "winlock", S_IFCHR, 0, DDI_PSEUDO, 0)
439	    == DDI_FAILURE) {
440		return (DDI_FAILURE);
441	}
442	winlock_dip = devi;
443	ddi_report_dev(devi);
444	return (DDI_SUCCESS);
445}
446
447/*ARGSUSED*/
448static	int
449winlock_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
450{
451	DEBUGF(1, (CE_CONT, "winlock_detach, devi=%p, cmd=%d\n",
452	    (void *)devi, (int)cmd));
453	if (cmd != DDI_DETACH)
454		return (DDI_FAILURE);
455
456	mutex_enter(&winlock_mutex);
457	if (lock_list != NULL) {
458		mutex_exit(&winlock_mutex);
459		return (DDI_FAILURE);
460	}
461	ASSERT(lock_free_list == NULL);
462
463	DEBUGF(1, (CE_CONT, "detach freeing trashpage and lockpage\n"));
464	/* destroy any common stuff created */
465	if (trashpage_cookie != NULL) {
466		ddi_umem_free(trashpage_cookie);
467		trashpage_cookie = NULL;
468	}
469	if (lockpage != NULL) {
470		ddi_umem_free(lockpage_cookie);
471		lockpage = NULL;
472		lockpage_cookie = NULL;
473	}
474	winlock_dip = NULL;
475	mutex_exit(&winlock_mutex);
476	return (DDI_SUCCESS);
477}
478
479/*ARGSUSED*/
480static	int
481winlock_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
482{
483	register int error;
484
485	/* initialize result */
486	*result = NULL;
487
488	/* only valid instance (i.e., getminor) is 0 */
489	if (getminor((dev_t)arg) >= 1)
490		return (DDI_FAILURE);
491
492	switch (infocmd) {
493	case DDI_INFO_DEVT2DEVINFO:
494		if (winlock_dip == NULL)
495			error = DDI_FAILURE;
496		else {
497			*result = (void *)winlock_dip;
498			error = DDI_SUCCESS;
499		}
500		break;
501	case DDI_INFO_DEVT2INSTANCE:
502		*result = (void *)0;
503		error = DDI_SUCCESS;
504		break;
505	default:
506		error = DDI_FAILURE;
507	}
508	return (error);
509}
510
511
512/*ARGSUSED*/
513int
514winlock_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
515	cred_t *cred, int *rval)
516{
517	DEBUGF(1, (CE_CONT, "winlockioctl: cmd=%d, arg=0x%p\n",
518	    cmd, (void *)arg));
519
520	switch (cmd) {
521	/*
522	 * ioctls that used to be handled by framebuffers (defined in fbio.h)
523	 * RFE: No code really calls the GRAB* ioctls now. Should EOL.
524	 */
525
526	case GRABPAGEALLOC:
527		return (seglock_graballoc(arg, OLDSTYLE_LOCK, mode));
528	case GRABPAGEFREE:
529		return (seglock_grabfree(arg, mode));
530	case GRABLOCKINFO:
531		return (seglock_grabinfo(arg, mode));
532	case GRABATTACH:
533		return (EINVAL); /* GRABATTACH is not supported (never was) */
534
535	case WINLOCKALLOC:
536		return (seglock_graballoc(arg, NEWSTYLE_LOCK, mode));
537	case WINLOCKFREE:
538		return (seglock_grabfree(arg, mode));
539	case WINLOCKSETTIMEOUT:
540		return (seglock_settimeout(arg, mode));
541	case WINLOCKGETTIMEOUT:
542		return (seglock_gettimeout(arg, mode));
543	case WINLOCKDUMP:
544		seglock_dump_all();
545		return (0);
546
547#ifdef DEBUG
548	case (WIOC|255):
549		lock_debug = arg;
550		return (0);
551#endif
552
553	default:
554		return (ENOTTY);		/* Why is this not EINVAL */
555	}
556}
557
558int
559winlocksegmap(
560	dev_t	dev,		/* major:minor */
561	off_t	off,		/* device offset from mmap(2) */
562	struct as *as,		/* user's address space. */
563	caddr_t	*addr,		/* address from mmap(2) */
564	off_t	len,		/* length from mmap(2) */
565	uint_t	prot,		/* user wants this access */
566	uint_t	maxprot,	/* this is the maximum the user can have */
567	uint_t	flags,		/* flags from mmap(2) */
568	cred_t	*cred)
569{
570	DEBUGF(1, (CE_CONT, "winlock_segmap off=%lx, len=0x%lx\n", off, len));
571
572	/* Only MAP_SHARED mappings are supported */
573	if ((flags & MAP_TYPE) == MAP_PRIVATE) {
574		return (EINVAL);
575	}
576
577	/* Use devmap_setup to setup the mapping */
578	return (devmap_setup(dev, (offset_t)off, as, addr, (size_t)len, prot,
579	    maxprot, flags, cred));
580}
581
582/*ARGSUSED*/
583int
584winlock_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
585    size_t *maplen, uint_t model)
586{
587	SegLock *lp;
588	int err;
589
590	DEBUGF(1, (CE_CONT, "winlock devmap: off=%llx, len=%lx, dhp=%p\n",
591	    off, len, (void *)dhp));
592
593	*maplen = 0;
594
595	/* Check if the lock exists, i.e., has been created by alloc */
596	/* off is the sy_ident returned in the alloc ioctl */
597	if ((lp = seglock_findlock((uint_t)off)) == NULL) {
598		return (ENXIO);
599	}
600
601	/*
602	 * The offset bits in mmap(2) offset has to be same as in lockptr
603	 * OR the offset should be 0 (i.e. masked off)
604	 */
605	if (((off & PAGEOFFSET) != 0) &&
606	    ((off ^ (uintptr_t)(lp->lockptr)) & (offset_t)PAGEOFFSET) != 0) {
607		DEBUGF(2, (CE_CONT,
608		    "mmap offset %llx mismatch with lockptr %p\n",
609		    off, (void *)lp->lockptr));
610		mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
611		return (EINVAL);
612	}
613
614	/* Only supports PAGESIZE length mappings */
615	if (len != PAGESIZE) {
616		mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
617		return (EINVAL);
618	}
619
620	/*
621	 * Set up devmap to point at page associated with lock
622	 * RFE: At this point we dont know if this is a lockpage or unlockpage
623	 * a lockpage would not need DEVMAP_ALLOW_REMAP setting
624	 * We could have kept track of the mapping order here,
625	 * but devmap framework does not support storing any state in this
626	 * devmap callback as it does not callback for error cleanup if some
627	 * other error happens in the framework.
628	 * RFE: We should modify the winlock mmap interface so that the
629	 * user process marks in the offset passed in whether this is for a
630	 * lock or unlock mapping instead of guessing based on order of maps
631	 * This would cleanup other things (such as in fork)
632	 */
633	if ((err = devmap_umem_setup(dhp, winlock_dip, &winlockmap_ops,
634	    lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT,
635	    DEVMAP_ALLOW_REMAP, 0)) < 0) {
636		mutex_exit(&lp->mutex);	/* held by seglock_findlock */
637		return (err);
638	}
639	/*
640	 * No mappings are loaded to those segments yet. The correctness
641	 * of the winlock semantics depends on the devmap framework/seg_dev NOT
642	 * loading the translations without calling _access callback.
643	 */
644
645	mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
646	*maplen = PAGESIZE;
647	return (0);
648}
649
650/*
651 * This routine is called by the devmap framework after the devmap entry point
652 * above and the mapping is setup in seg_dev.
653 * We store the pointer to the per-process context in the devmap private data.
654 */
655/*ARGSUSED*/
656static int
657winlockmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
658	size_t len, void **pvtp)
659{
660	SegLock *lp = seglock_findlock((uint_t)off); /* returns w/ mutex held */
661	SegProc *sdp;
662
663	ASSERT(len == PAGESIZE);
664
665	/* Find the per-process context for this lock, alloc one if not found */
666	sdp = seglock_allocclient(lp);
667
668	/*
669	 * RFE: Determining which is a lock vs unlock seg is based on order
670	 * of mmaps, we should change that to be derivable from off
671	 */
672	if (sdp->lockseg == NULL) {
673		sdp->lockseg = dhp;
674	} else if (sdp->unlockseg == NULL) {
675		sdp->unlockseg = dhp;
676	} else {
677		/* attempting to map lock more than twice */
678		mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
679		return (ENOMEM);
680	}
681
682	*pvtp = sdp;
683	mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
684	return (DDI_SUCCESS);
685}
686
687/*
688 * duplicate a segment, as in fork()
689 * On fork, the child inherits the mappings to the lock
690 *	lp->alloccount is NOT incremented, so child should not do a free().
691 *	Semantics same as if done an alloc(), map(), map().
692 *	This way it would work fine if doing an exec() variant later
693 *	Child does not inherit any UFLAGS set in parent
694 * The lock and unlock pages are started off unmapped, i.e., child does not
695 *	own the lock.
696 * The code assumes that the child process has a valid pid at this point
697 * RFE: This semantics depends on fork not duplicating the hat mappings
698 *	(which is the current implementation). To enforce it would need to
699 *	call devmap_unload from here - not clear if that is allowed.
700 */
701
702static int
703winlockmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
704	void **newpvt)
705{
706	SegProc *sdp = (SegProc *)oldpvt;
707	SegProc *ndp;
708	SegLock *lp = sdp->lp;
709
710	mutex_enter(&lp->mutex);
711	ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
712
713	/*
714	 * Note: At this point, the child process does have a pid, but
715	 * the arguments passed to as_dup and hence to devmap_dup dont pass it
716	 * down. So we cannot use normal seglock_findclient - which finds the
717	 * parent sdp itself!
718	 * Instead we allocate the child's SegProc by using the child as pointer
719	 * RFE: we are using the as stucture which means peeking into the
720	 * devmap_cookie. This is not DDI-compliant. Need a compliant way of
721	 * getting at either the as or, better, a way to get the child's new pid
722	 */
723	ndp = seglock_alloc_specific(lp,
724	    (void *)((devmap_handle_t *)new_dhp)->dh_seg->s_as);
725	ASSERT(ndp != sdp);
726
727	if (sdp->lockseg == dhp) {
728		ASSERT(ndp->lockseg == NULL);
729		ndp->lockseg = new_dhp;
730	} else {
731		ASSERT(sdp->unlockseg == dhp);
732		ASSERT(ndp->unlockseg == NULL);
733		ndp->unlockseg = new_dhp;
734		if (sdp->flag & TRASHPAGE) {
735			ndp->flag |= TRASHPAGE;
736		}
737	}
738	mutex_exit(&lp->mutex);
739	*newpvt = (void *)ndp;
740	return (0);
741}
742
743
744/*ARGSUSED*/
745static void
746winlockmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
747	devmap_cookie_t new_dhp1, void **newpvtp1,
748	devmap_cookie_t new_dhp2, void **newpvtp2)
749{
750	SegProc	*sdp = (SegProc *)pvtp;
751	SegLock	*lp = sdp->lp;
752
753	/*
754	 * We always create PAGESIZE length mappings, so there should never
755	 * be a partial unmapping case
756	 */
757	ASSERT((new_dhp1 == NULL) && (new_dhp2 == NULL));
758
759	mutex_enter(&lp->mutex);
760	ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
761	/* make sure this process doesn't own the lock */
762	if (sdp == lp->owner) {
763		/*
764		 * Not handling errors - i.e., errors in unloading mapping
765		 * As part of unmapping hat/seg structure get torn down anyway
766		 */
767		(void) lock_giveup(lp, 0);
768	}
769
770	ASSERT(sdp != lp->owner);
771	if (sdp->lockseg == dhp) {
772		sdp->lockseg = NULL;
773	} else {
774		ASSERT(sdp->unlockseg == dhp);
775		sdp->unlockseg = NULL;
776		sdp->flag &= ~TRASHPAGE;	/* clear flag if set */
777	}
778
779	garbage_collect_lock(lp, sdp);
780}
781
782/*ARGSUSED*/
783static int
784winlockmap_access(devmap_cookie_t dhp, void *pvt, offset_t off, size_t len,
785	uint_t type, uint_t rw)
786{
787	SegProc *sdp = (SegProc *)pvt;
788	SegLock *lp = sdp->lp;
789	int err;
790
791	/* Driver handles only DEVMAP_ACCESS type of faults */
792	if (type != DEVMAP_ACCESS)
793		return (-1);
794
795	mutex_enter(&lp->mutex);
796	ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
797
798	/* should be using a SegProc that corresponds to current process */
799	ASSERT(ID(sdp) == CURPROC_ID);
800
801	/*
802	 * If process is faulting but does not have both segments mapped
803	 * return error (should cause a segv).
804	 * RFE: could give it a permanent trashpage
805	 */
806	if ((sdp->lockseg == NULL) || (sdp->unlockseg == NULL)) {
807		err = -1;
808	} else {
809		err = seglock_lockfault(dhp, sdp, lp, rw);
810	}
811	mutex_exit(&lp->mutex);
812	return (err);
813}
814
815	/* INTERNAL ROUTINES START HERE */
816
817
818
819/*
820 * search the lock_list list for the specified cookie
821 * The cookie is the sy_ident field returns by ALLOC ioctl.
822 * This has two parts:
823 * the pageoffset bits contain offset into the lock page.
824 * the pagenumber bits contain the lock id.
825 * The user code is supposed to pass in only the pagenumber portion
826 *	(i.e. mask off the pageoffset bits). However the code below
827 *	does the mask in case the users are not diligent
828 * if found, returns with mutex for SegLock structure held
829 */
830static SegLock *
831seglock_findlock(uint_t cookie)
832{
833	SegLock	*lp;
834
835	cookie &= (uint_t)PAGEMASK;   /* remove pageoffset bits to get cookie */
836	mutex_enter(&winlock_mutex);
837	for (lp = lock_list; lp != NULL; lp = lp->next) {
838		mutex_enter(&lp->mutex);
839		if (cookie == lp->cookie) {
840			break;	/* return with lp->mutex held */
841		}
842		mutex_exit(&lp->mutex);
843	}
844	mutex_exit(&winlock_mutex);
845	return (lp);
846}
847
848/*
849 * search the lock_list list for the specified non-zero key
850 * if found, returns with lock for SegLock structure held
851 */
852static SegLock *
853seglock_findkey(uint_t key)
854{
855	SegLock	*lp;
856
857	ASSERT(MUTEX_HELD(&winlock_mutex));
858	/* The driver allows multiple locks with key 0, dont search */
859	if (key == 0)
860		return (NULL);
861	for (lp = lock_list; lp != NULL; lp = lp->next) {
862		mutex_enter(&lp->mutex);
863		if (key == lp->key)
864			break;
865		mutex_exit(&lp->mutex);
866	}
867	return (lp);
868}
869
870/*
871 * Create a new lock context.
872 * Returns with SegLock mutex held
873 */
874
875static SegLock *
876seglock_createlock(enum winlock_style style)
877{
878	SegLock	*lp;
879
880	DEBUGF(3, (CE_CONT, "seglock_createlock: free_list=%p, next_lock %d\n",
881	    (void *)lock_free_list, next_lock));
882
883	ASSERT(MUTEX_HELD(&winlock_mutex));
884	if (lock_free_list != NULL) {
885		lp = lock_free_list;
886		lock_free_list = lp->next;
887	} else if (next_lock >= MAX_LOCKS) {
888		return (NULL);
889	} else {
890		lp = kmem_zalloc(sizeof (SegLock), KM_SLEEP);
891		lp->cookie = (next_lock + 1) * (uint_t)PAGESIZE;
892		mutex_init(&lp->mutex, NULL, MUTEX_DEFAULT, NULL);
893		cv_init(&lp->locksleep, NULL, CV_DEFAULT, NULL);
894		++next_lock;
895	}
896
897	mutex_enter(&lp->mutex);
898	ASSERT((lp->cookie/PAGESIZE) <= next_lock);
899
900	if (style == OLDSTYLE_LOCK) {
901		lp->lockptr = (int *)ddi_umem_alloc(PAGESIZE,
902		    DDI_UMEM_SLEEP, &(lp->umem_cookie));
903	} else {
904		lp->lockptr = ((int *)lockpage) + ((lp->cookie/PAGESIZE) - 1);
905		lp->umem_cookie = lockpage_cookie;
906	}
907
908	ASSERT(lp->lockptr != NULL);
909	lp->style = style;
910	lp->sleepers = 0;
911	lp->alloccount = 1;
912	lp->timeout = LOCKTIME*hz;
913	lp->clients = NULL;
914	lp->owner = NULL;
915	LOCK(lp) = 0;
916	lp->next = lock_list;
917	lock_list = lp;
918	return (lp);
919}
920
921/*
922 * Routine to destory a lock structure.
923 * This routine is called while holding the lp->mutex but not the
924 * winlock_mutex.
925 */
926
927static void
928seglock_destroylock(SegLock *lp)
929{
930	ASSERT(MUTEX_HELD(&lp->mutex));
931	ASSERT(!MUTEX_HELD(&winlock_mutex));
932
933	DEBUGF(3, (CE_CONT, "destroying lock cookie %d key %d\n",
934	    lp->cookie, lp->key));
935
936	ASSERT(lp->alloccount == 0);
937	ASSERT(lp->clients == NULL);
938	ASSERT(lp->owner == NULL);
939	ASSERT(lp->sleepers == 0);
940
941	/* clean up/release fields in lp */
942	if (lp->style == OLDSTYLE_LOCK) {
943		ddi_umem_free(lp->umem_cookie);
944	}
945	lp->umem_cookie = NULL;
946	lp->lockptr = NULL;
947	lp->key = 0;
948
949	/*
950	 * Reduce cookie by 1, makes it non page-aligned and invalid
951	 * This prevents any valid lookup from finding this lock
952	 * so when we drop the lock and regrab it it will still
953	 * be there and nobody else would have attached to it
954	 */
955	lp->cookie--;
956
957	/* Drop and reacquire mutexes in right order */
958	mutex_exit(&lp->mutex);
959	mutex_enter(&winlock_mutex);
960	mutex_enter(&lp->mutex);
961
962	/* reincrement the cookie to get the original valid cookie */
963	lp->cookie++;
964	ASSERT((lp->cookie & PAGEOFFSET) == 0);
965	ASSERT(lp->alloccount == 0);
966	ASSERT(lp->clients == NULL);
967	ASSERT(lp->owner == NULL);
968	ASSERT(lp->sleepers == 0);
969
970	/* Remove lp from lock_list */
971	if (lock_list == lp) {
972		lock_list = lp->next;
973	} else {
974		SegLock *tmp = lock_list;
975		while (tmp->next != lp) {
976			tmp = tmp->next;
977			ASSERT(tmp != NULL);
978		}
979		tmp->next = lp->next;
980	}
981
982	/* Add to lock_free_list */
983	lp->next = lock_free_list;
984	lock_free_list = lp;
985	mutex_exit(&lp->mutex);
986
987	/* Check if all locks deleted and cleanup */
988	if (lock_list == NULL) {
989		lock_destroyall();
990	}
991
992	mutex_exit(&winlock_mutex);
993}
994
995/* Routine to find a SegProc corresponding to the tag */
996
997static SegProc *
998seglock_find_specific(SegLock *lp, void *tag)
999{
1000	SegProc *sdp;
1001
1002	ASSERT(MUTEX_HELD(&lp->mutex));
1003	ASSERT(tag != NULL);
1004	for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) {
1005		if (ID(sdp) == tag)
1006			break;
1007	}
1008	return (sdp);
1009}
1010
1011/* Routine to find (and if needed allocate) a SegProc corresponding to tag */
1012
1013static SegProc *
1014seglock_alloc_specific(SegLock *lp, void *tag)
1015{
1016	SegProc *sdp;
1017
1018	ASSERT(MUTEX_HELD(&lp->mutex));
1019	ASSERT(tag != NULL);
1020
1021	/* Search and return if existing one found */
1022	sdp = seglock_find_specific(lp, tag);
1023	if (sdp != NULL)
1024		return (sdp);
1025
1026	DEBUGF(3, (CE_CONT, "Allocating segproc structure for tag %p lock %d\n",
1027	    tag, lp->cookie));
1028
1029	/* Allocate a new SegProc */
1030	sdp = kmem_zalloc(sizeof (SegProc), KM_SLEEP);
1031	sdp->next = lp->clients;
1032	lp->clients = sdp;
1033	sdp->lp = lp;
1034	ID(sdp) = tag;
1035	return (sdp);
1036}
1037
1038/*
1039 * search a context's client list for the given client and delete
1040 */
1041
1042static void
1043seglock_deleteclient(SegLock *lp, SegProc *sdp)
1044{
1045	ASSERT(MUTEX_HELD(&lp->mutex));
1046	ASSERT(lp->owner != sdp);	/* Not current owner of lock */
1047	ASSERT(sdp->lockseg == NULL);	/* Mappings torn down */
1048	ASSERT(sdp->unlockseg == NULL);
1049
1050	DEBUGF(3, (CE_CONT, "Deleting segproc structure for pid %d lock %d\n",
1051	    ddi_get_pid(), lp->cookie));
1052	if (lp->clients == sdp) {
1053		lp->clients = sdp->next;
1054	} else {
1055		SegProc *tmp = lp->clients;
1056		while (tmp->next != sdp) {
1057			tmp = tmp->next;
1058			ASSERT(tmp != NULL);
1059		}
1060		tmp->next = sdp->next;
1061	}
1062	kmem_free(sdp, sizeof (SegProc));
1063}
1064
1065/*
1066 * Routine to verify if a SegProc and SegLock
1067 * structures are empty/idle.
1068 * Destroys the structures if they are ready
1069 * Can be called with sdp == NULL if want to verify only the lock state
1070 * caller should hold the lp->mutex
1071 * and this routine drops the mutex
1072 */
1073static void
1074garbage_collect_lock(SegLock *lp, SegProc *sdp)
1075{
1076	ASSERT(MUTEX_HELD(&lp->mutex));
1077	/* see if both segments unmapped from client structure */
1078	if ((sdp != NULL) && (sdp->lockseg == NULL) && (sdp->unlockseg == NULL))
1079		seglock_deleteclient(lp, sdp);
1080
1081	/* see if this is last client in the entire lock context */
1082	if ((lp->clients == NULL) && (lp->alloccount == 0)) {
1083		seglock_destroylock(lp);
1084	} else {
1085		mutex_exit(&lp->mutex);
1086	}
1087}
1088
1089
1090/* IOCTLS START HERE */
1091
1092static int
1093seglock_grabinfo(intptr_t arg, int mode)
1094{
1095	int i = 1;
1096
1097	/* multiple clients per lock supported - see comments up top */
1098	if (ddi_copyout((caddr_t)&i, (caddr_t)arg, sizeof (int), mode) != 0)
1099		return (EFAULT);
1100	return (0);
1101}
1102
1103static int
1104seglock_graballoc(intptr_t arg, enum winlock_style style, int mode) /* IOCTL */
1105{
1106	struct seglock	*lp;
1107	uint_t		key;
1108	struct		winlockalloc wla;
1109	int		err;
1110
1111	if (style == OLDSTYLE_LOCK) {
1112		key = 0;
1113	} else {
1114		if (ddi_copyin((caddr_t)arg, (caddr_t)&wla, sizeof (wla),
1115		    mode)) {
1116			return (EFAULT);
1117		}
1118		key = wla.sy_key;
1119	}
1120
1121	DEBUGF(3, (CE_CONT,
1122	    "seglock_graballoc: key=%u, style=%d\n", key, style));
1123
1124	mutex_enter(&winlock_mutex);
1125	/* Allocate lockpage on first new style alloc */
1126	if ((lockpage == NULL) && (style == NEWSTYLE_LOCK)) {
1127		lockpage = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP,
1128		    &lockpage_cookie);
1129	}
1130
1131	/* Allocate trashpage on first alloc (any style) */
1132	if (trashpage_cookie == NULL) {
1133		(void) ddi_umem_alloc(PAGESIZE, DDI_UMEM_TRASH | DDI_UMEM_SLEEP,
1134		    &trashpage_cookie);
1135	}
1136
1137	if ((lp = seglock_findkey(key)) != NULL) {
1138		DEBUGF(2, (CE_CONT, "alloc: found lock key %d cookie %d\n",
1139		    key, lp->cookie));
1140		++lp->alloccount;
1141	} else if ((lp = seglock_createlock(style)) != NULL) {
1142		DEBUGF(2, (CE_CONT, "alloc: created lock key %d cookie %d\n",
1143		    key, lp->cookie));
1144		lp->key = key;
1145	} else {
1146		DEBUGF(2, (CE_CONT, "alloc: cannot create lock key %d\n", key));
1147		mutex_exit(&winlock_mutex);
1148		return (ENOMEM);
1149	}
1150	ASSERT((lp != NULL) && MUTEX_HELD(&lp->mutex));
1151
1152	mutex_exit(&winlock_mutex);
1153
1154	if (style == OLDSTYLE_LOCK) {
1155		err = ddi_copyout((caddr_t)&lp->cookie, (caddr_t)arg,
1156		    sizeof (lp->cookie), mode);
1157	} else {
1158		wla.sy_ident = lp->cookie +
1159		    (uint_t)((uintptr_t)(lp->lockptr) & PAGEOFFSET);
1160		err = ddi_copyout((caddr_t)&wla, (caddr_t)arg,
1161		    sizeof (wla), mode);
1162	}
1163
1164	if (err) {
1165		/* On error, should undo allocation */
1166		lp->alloccount--;
1167
1168		/* Verify and delete if lock is unused now */
1169		garbage_collect_lock(lp, NULL);
1170		return (EFAULT);
1171	}
1172
1173	mutex_exit(&lp->mutex);
1174	return (0);
1175}
1176
1177static int
1178seglock_grabfree(intptr_t arg, int mode)	/* IOCTL */
1179{
1180	struct seglock	*lp;
1181	uint_t	offset;
1182
1183	if (ddi_copyin((caddr_t)arg, &offset, sizeof (offset), mode)
1184	    != 0) {
1185		return (EFAULT);
1186	}
1187	DEBUGF(2, (CE_CONT, "seglock_grabfree: offset=%u", offset));
1188
1189	if ((lp = seglock_findlock(offset)) == NULL) {
1190		DEBUGF(2, (CE_CONT, "did not find lock\n"));
1191		return (EINVAL);
1192	}
1193	DEBUGF(3, (CE_CONT, " lock key %d, cookie %d, alloccount %d\n",
1194	    lp->key, lp->cookie, lp->alloccount));
1195
1196	if (lp->alloccount > 0)
1197		lp->alloccount--;
1198
1199	/* Verify and delete if lock is unused now */
1200	garbage_collect_lock(lp, NULL);
1201	return (0);
1202}
1203
1204
1205/*
1206 * Sets timeout in lock and UFLAGS in client
1207 *	the UFLAGS are stored in the client structure and persistent only
1208 *	till the unmap of the lock pages. If the process sets UFLAGS
1209 *	does a map of the lock/unlock pages and unmaps them, the client
1210 *	structure will get deleted and the UFLAGS will be lost. The process
1211 *	will need to resetup the flags.
1212 */
1213static int
1214seglock_settimeout(intptr_t arg, int mode)	/* IOCTL */
1215{
1216	SegLock		*lp;
1217	SegProc		*sdp;
1218	struct winlocktimeout		wlt;
1219
1220	if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0) {
1221		return (EFAULT);
1222	}
1223
1224	if ((lp = seglock_findlock(wlt.sy_ident)) == NULL)
1225		return (EINVAL);
1226
1227	lp->timeout = MSEC_TO_TICK_ROUNDUP(wlt.sy_timeout);
1228	/* if timeout modified, wake up any sleepers */
1229	if (lp->sleepers > 0) {
1230		cv_broadcast(&lp->locksleep);
1231	}
1232
1233	/*
1234	 * If the process is trying to set UFLAGS,
1235	 *	Find the client segproc and allocate one if needed
1236	 *	Set the flags preserving the kernel flags
1237	 * If the process is clearing UFLAGS
1238	 *	Find the client segproc but dont allocate one if does not exist
1239	 */
1240	if (wlt.sy_flags & UFLAGS) {
1241		sdp = seglock_allocclient(lp);
1242		sdp->flag = sdp->flag & KFLAGS | wlt.sy_flags & UFLAGS;
1243	} else if ((sdp = seglock_findclient(lp)) != NULL) {
1244		sdp->flag = sdp->flag & KFLAGS;
1245		/* If clearing UFLAGS leaves the segment or lock idle, delete */
1246		garbage_collect_lock(lp, sdp);
1247		return (0);
1248	}
1249	mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
1250	return (0);
1251}
1252
1253static int
1254seglock_gettimeout(intptr_t arg, int mode)
1255{
1256	SegLock		*lp;
1257	SegProc		*sdp;
1258	struct winlocktimeout		wlt;
1259
1260	if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0)
1261		return (EFAULT);
1262
1263	if ((lp = seglock_findlock(wlt.sy_ident)) == NULL)
1264		return (EINVAL);
1265
1266	wlt.sy_timeout = TICK_TO_MSEC(lp->timeout);
1267	/*
1268	 * If this process has an active allocated lock return those flags
1269	 *	Dont allocate a client structure on gettimeout
1270	 * If not, return 0.
1271	 */
1272	if ((sdp = seglock_findclient(lp)) != NULL) {
1273		wlt.sy_flags = sdp->flag & UFLAGS;
1274	} else {
1275		wlt.sy_flags = 0;
1276	}
1277	mutex_exit(&lp->mutex);	/* mutex held by seglock_findlock */
1278
1279	if (ddi_copyout(&wlt, (caddr_t)arg, sizeof (wlt), mode) != 0)
1280		return (EFAULT);
1281
1282	return (0);
1283}
1284
1285/*
1286 * Handle lock segment faults here...
1287 *
1288 * This is where the magic happens.
1289 */
1290
1291/* ARGSUSED */
1292static	int
1293seglock_lockfault(devmap_cookie_t dhp, SegProc *sdp, SegLock *lp, uint_t rw)
1294{
1295	SegProc *owner = lp->owner;
1296	int err;
1297
1298	ASSERT(MUTEX_HELD(&lp->mutex));
1299	DEBUGF(3, (CE_CONT,
1300	    "seglock_lockfault: hdl=%p, sdp=%p, lp=%p owner=%p\n",
1301	    (void *)dhp, (void *)sdp, (void *)lp, (void *)owner));
1302
1303	/* lockfault is always called with sdp in current process context */
1304	ASSERT(ID(sdp) == CURPROC_ID);
1305
1306	/* If Lock has no current owner, give the mapping to new owner */
1307	if (owner == NULL) {
1308		DEBUGF(4, (CE_CONT, " lock has no current owner\n"));
1309		return (give_mapping(lp, sdp, rw));
1310	}
1311
1312	if (owner == sdp) {
1313		/*
1314		 * Current owner is faulting on owned lock segment OR
1315		 * Current owner is faulting on unlock page and has no waiters
1316		 * Then can give the mapping to current owner
1317		 */
1318		if ((sdp->lockseg == dhp) || (lp->sleepers == 0)) {
1319		DEBUGF(4, (CE_CONT, "lock owner faulting\n"));
1320		return (give_mapping(lp, sdp, rw));
1321		} else {
1322		/*
1323		 * Owner must be writing to unlock page and there are waiters.
1324		 * other cases have been checked earlier.
1325		 * Release the lock, owner, and owners mappings
1326		 * As the owner is trying to write to the unlock page, leave
1327		 * it with a trashpage mapping and wake up the sleepers
1328		 */
1329		ASSERT((dhp == sdp->unlockseg) && (lp->sleepers != 0));
1330		DEBUGF(4, (CE_CONT, " owner fault on unlock seg w/ sleeper\n"));
1331		return (lock_giveup(lp, 1));
1332		}
1333	}
1334
1335	ASSERT(owner != sdp);
1336
1337	/*
1338	 * If old owner faulting on trash unlock mapping,
1339	 * load hat mappings to trash page
1340	 * RFE: non-owners should NOT be faulting on unlock mapping as they
1341	 * as first supposed to fault on the lock seg. We could give them
1342	 * a trash page or return error.
1343	 */
1344	if ((sdp->unlockseg == dhp) && (sdp->flag & TRASHPAGE)) {
1345		DEBUGF(4, (CE_CONT, " old owner reloads trash mapping\n"));
1346		return (devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE,
1347		    DEVMAP_ACCESS, rw));
1348	}
1349
1350	/*
1351	 * Non-owner faulting. Need to check current LOCK state.
1352	 *
1353	 * Before reading lock value in LOCK(lp), we must make sure that
1354	 * the owner cannot change its value before we change mappings
1355	 * or else we could end up either with a hung process
1356	 * or more than one process thinking they have the lock.
1357	 * We do that by unloading the owner's mappings
1358	 */
1359	DEBUGF(4, (CE_CONT, " owner loses mappings to check lock state\n"));
1360	err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE);
1361	err |= devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE);
1362	if (err != 0)
1363		return (err);	/* unable to remove owner mapping */
1364
1365	/*
1366	 * If lock is not held, then current owner mappings were
1367	 * unloaded above and we can give the lock to the new owner
1368	 */
1369	if (LOCK(lp) == 0) {
1370		DEBUGF(4, (CE_CONT,
1371		    "Free lock (%p): Giving mapping to new owner %d\n",
1372		    (void *)lp, ddi_get_pid()));
1373		return (give_mapping(lp, sdp, rw));
1374	}
1375
1376	DEBUGF(4, (CE_CONT, "  lock held, sleeping\n"));
1377
1378	/*
1379	 * A non-owning process tried to write (presumably to the lockpage,
1380	 * but it doesn't matter) but the lock is held; we need to sleep for
1381	 * the lock while there is an owner.
1382	 */
1383
1384	lp->sleepers++;
1385	while ((owner = lp->owner) != NULL) {
1386		int rval;
1387
1388		if ((lp->timeout == 0) || (owner->flag & SY_NOTIMEOUT)) {
1389			/*
1390			 * No timeout has been specified for this lock;
1391			 * we'll simply sleep on the condition variable.
1392			 */
1393			rval = cv_wait_sig(&lp->locksleep, &lp->mutex);
1394		} else {
1395			/*
1396			 * A timeout _has_ been specified for this lock. We need
1397			 * to wake up and possibly steal this lock if the owner
1398			 * does not let it go. Note that all sleepers on a lock
1399			 * with a timeout wait; the sleeper with the earliest
1400			 * timeout will wakeup, and potentially steal the lock
1401			 * Stealing the lock will cause a broadcast on the
1402			 * locksleep cv and thus kick the other timed waiters
1403			 * and cause everyone to restart in a new timedwait
1404			 */
1405			rval = cv_timedwait_sig(&lp->locksleep,
1406			    &lp->mutex, ddi_get_lbolt() + lp->timeout);
1407		}
1408
1409		/*
1410		 * Timeout and still old owner - steal lock
1411		 * Force-Release lock and give old owner a trashpage mapping
1412		 */
1413		if ((rval == -1) && (lp->owner == owner)) {
1414			/*
1415			 * if any errors in lock_giveup, go back and sleep/retry
1416			 * If successful, will break out of loop
1417			 */
1418			cmn_err(CE_NOTE, "Process %d timed out on lock %d\n",
1419			    ddi_get_pid(), lp->cookie);
1420			(void) lock_giveup(lp, 1);
1421		} else if (rval == 0) { /* signal pending */
1422			cmn_err(CE_NOTE,
1423			    "Process %d signalled while waiting on lock %d\n",
1424			    ddi_get_pid(), lp->cookie);
1425			lp->sleepers--;
1426			return (FC_MAKE_ERR(EINTR));
1427		}
1428	}
1429
1430	lp->sleepers--;
1431	/*
1432	 * Give mapping to this process and save a fault later
1433	 */
1434	return (give_mapping(lp, sdp, rw));
1435}
1436
1437/*
1438 * Utility: give a valid mapping to lock and unlock pages to current process.
1439 * Caller responsible for unloading old owner's mappings
1440 */
1441
1442static int
1443give_mapping(SegLock *lp, SegProc *sdp, uint_t rw)
1444{
1445	int err = 0;
1446
1447	ASSERT(MUTEX_HELD(&lp->mutex));
1448	ASSERT(!((lp->owner == NULL) && (LOCK(lp) != 0)));
1449	/* give_mapping is always called with sdp in current process context */
1450	ASSERT(ID(sdp) == CURPROC_ID);
1451
1452	/* remap any old trash mappings */
1453	if (sdp->flag & TRASHPAGE) {
1454		/* current owner should not have a trash mapping */
1455		ASSERT(sdp != lp->owner);
1456
1457		DEBUGF(4, (CE_CONT,
1458		    "new owner %d remapping old trash mapping\n",
1459		    ddi_get_pid()));
1460		if ((err = devmap_umem_remap(sdp->unlockseg, winlock_dip,
1461		    lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) {
1462			/*
1463			 * unable to remap old trash page,
1464			 * abort before changing owner
1465			 */
1466			DEBUGF(4, (CE_CONT,
1467			    "aborting: error in umem_remap %d\n", err));
1468			return (err);
1469		}
1470		sdp->flag &= ~TRASHPAGE;
1471	}
1472
1473	/* we have a new owner now */
1474	lp->owner = sdp;
1475
1476	if ((err = devmap_load(sdp->lockseg, lp->cookie, PAGESIZE,
1477	    DEVMAP_ACCESS, rw)) != 0) {
1478		return (err);
1479	}
1480	DEBUGF(4, (CE_CONT, "new owner %d gets lock mapping", ddi_get_pid()));
1481
1482	if (lp->sleepers) {
1483		/* Force unload unlock mapping if there are waiters */
1484		DEBUGF(4, (CE_CONT,
1485		    " lock has %d sleepers => remove unlock mapping\n",
1486		    lp->sleepers));
1487		err = devmap_unload(sdp->unlockseg, lp->cookie, PAGESIZE);
1488	} else {
1489		/*
1490		 * while here, give new owner a valid mapping to unlock
1491		 * page so we don't get called again.
1492		 */
1493		DEBUGF(4, (CE_CONT, " and unlock mapping\n"));
1494		err = devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE,
1495		    DEVMAP_ACCESS, PROT_WRITE);
1496	}
1497	return (err);
1498}
1499
1500/*
1501 * Unload owner's mappings, release the lock and wakeup any sleepers
1502 * If trash, then the old owner is given a trash mapping
1503 *	=> old owner held lock too long and caused a timeout
1504 */
1505static int
1506lock_giveup(SegLock *lp, int trash)
1507{
1508	SegProc *owner = lp->owner;
1509
1510	DEBUGF(4, (CE_CONT, "winlock_giveup: lp=%p, owner=%p, trash %d\n",
1511	    (void *)lp, (void *)ID(lp->owner), trash));
1512
1513	ASSERT(MUTEX_HELD(&lp->mutex));
1514	ASSERT(owner != NULL);
1515
1516	/*
1517	 * owner loses lockpage/unlockpage mappings and gains a
1518	 * trashpage mapping, if needed.
1519	 */
1520	if (!trash) {
1521		/*
1522		 * We do not handle errors in devmap_unload in the !trash case,
1523		 * as the process is attempting to unmap/exit or otherwise
1524		 * release the lock. Errors in unloading the mapping are not
1525		 * going to affect that (unmap does not take error return).
1526		 */
1527		(void) devmap_unload(owner->lockseg, lp->cookie, PAGESIZE);
1528		(void) devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE);
1529	} else {
1530		int err;
1531
1532		if (err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE)) {
1533			/* error unloading lockseg mapping. abort giveup */
1534			return (err);
1535		}
1536
1537		/*
1538		 * old owner gets mapping to trash page so it can continue
1539		 * devmap_umem_remap does a hat_unload (and does it holding
1540		 * the right locks), so no need to devmap_unload on unlockseg
1541		 */
1542		if ((err = devmap_umem_remap(owner->unlockseg, winlock_dip,
1543		    trashpage_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) {
1544			/* error remapping to trash page, abort giveup */
1545			return (err);
1546		}
1547		owner->flag |= TRASHPAGE;
1548		/*
1549		 * Preload mapping to trash page by calling devmap_load
1550		 * However, devmap_load can only be called on the faulting
1551		 * process context and not on the owner's process context
1552		 * we preload only if we happen to be in owner process context
1553		 * Other processes will fault on the unlock mapping
1554		 * and be given a trash mapping at that time.
1555		 */
1556		if (ID(owner) == CURPROC_ID) {
1557			(void) devmap_load(owner->unlockseg, lp->cookie,
1558			    PAGESIZE, DEVMAP_ACCESS, PROT_WRITE);
1559		}
1560	}
1561
1562	lp->owner = NULL;
1563
1564	/* Clear the lock value in underlying page so new owner can grab it */
1565	LOCK(lp) = 0;
1566
1567	if (lp->sleepers) {
1568		DEBUGF(4, (CE_CONT, "  waking up, lp=%p\n", (void *)lp));
1569		cv_broadcast(&lp->locksleep);
1570	}
1571	return (0);
1572}
1573
1574/*
1575 * destroy all allocated memory.
1576 */
1577
1578static void
1579lock_destroyall(void)
1580{
1581	SegLock	*lp, *lpnext;
1582
1583	ASSERT(MUTEX_HELD(&winlock_mutex));
1584	ASSERT(lock_list == NULL);
1585
1586	DEBUGF(1, (CE_CONT, "Lock list empty. Releasing free list\n"));
1587	for (lp = lock_free_list; lp != NULL; lp = lpnext) {
1588		mutex_enter(&lp->mutex);
1589		lpnext =  lp->next;
1590		ASSERT(lp->clients == NULL);
1591		ASSERT(lp->owner == NULL);
1592		ASSERT(lp->alloccount == 0);
1593		mutex_destroy(&lp->mutex);
1594		cv_destroy(&lp->locksleep);
1595		kmem_free(lp, sizeof (SegLock));
1596	}
1597	lock_free_list = NULL;
1598	next_lock = 0;
1599}
1600
1601
1602/* RFE: create mdb walkers instead of dump routines? */
1603static void
1604seglock_dump_all(void)
1605{
1606	SegLock	*lp;
1607
1608	mutex_enter(&winlock_mutex);
1609	cmn_err(CE_CONT, "ID\tKEY\tNALLOC\tATTCH\tOWNED\tLOCK\tWAITER\n");
1610
1611	cmn_err(CE_CONT, "Lock List:\n");
1612	for (lp = lock_list; lp != NULL; lp = lp->next) {
1613		mutex_enter(&lp->mutex);
1614		cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n",
1615		    lp->cookie, lp->key, lp->alloccount,
1616		    lp->clients ? 'Y' : 'N',
1617		    lp->owner ? 'Y' : 'N',
1618		    lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N',
1619		    lp->sleepers);
1620		mutex_exit(&lp->mutex);
1621	}
1622	cmn_err(CE_CONT, "Free Lock List:\n");
1623	for (lp = lock_free_list; lp != NULL; lp = lp->next) {
1624		mutex_enter(&lp->mutex);
1625		cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n",
1626		    lp->cookie, lp->key, lp->alloccount,
1627		    lp->clients ? 'Y' : 'N',
1628		    lp->owner ? 'Y' : 'N',
1629		    lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N',
1630		    lp->sleepers);
1631		mutex_exit(&lp->mutex);
1632	}
1633
1634#ifdef DEBUG
1635	if (lock_debug < 3) {
1636		mutex_exit(&winlock_mutex);
1637		return;
1638	}
1639
1640	for (lp = lock_list; lp != NULL; lp = lp->next) {
1641		SegProc	*sdp;
1642
1643		mutex_enter(&lp->mutex);
1644		cmn_err(CE_CONT,
1645		    "lock %p, key=%d, cookie=%d, nalloc=%u, lock=%d, wait=%d\n",
1646		    (void *)lp, lp->key, lp->cookie, lp->alloccount,
1647		    lp->lockptr != 0 ? LOCK(lp) : -1, lp->sleepers);
1648
1649		cmn_err(CE_CONT,
1650		    "style=%d, lockptr=%p, timeout=%ld, clients=%p, owner=%p\n",
1651		    lp->style, (void *)lp->lockptr, lp->timeout,
1652		    (void *)lp->clients, (void *)lp->owner);
1653
1654
1655		for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) {
1656			cmn_err(CE_CONT, "  client %p%s, lp=%p, flag=%x, "
1657			    "process tag=%p, lockseg=%p, unlockseg=%p\n",
1658			    (void *)sdp, sdp == lp->owner ? " (owner)" : "",
1659			    (void *)sdp->lp, sdp->flag, (void *)ID(sdp),
1660			    (void *)sdp->lockseg, (void *)sdp->unlockseg);
1661		}
1662		mutex_exit(&lp->mutex);
1663	}
1664#endif
1665	mutex_exit(&winlock_mutex);
1666}
1667
1668#include <sys/modctl.h>
1669
1670static struct modldrv modldrv = {
1671	&mod_driverops,		/* Type of module.  This one is a driver */
1672	"Winlock Driver",	/* Name of the module */
1673	&winlock_ops,		/* driver ops */
1674};
1675
1676static struct modlinkage modlinkage = {
1677	MODREV_1,
1678	(void *)&modldrv,
1679	0,
1680	0,
1681	0
1682};
1683
1684int
1685_init(void)
1686{
1687	int e;
1688
1689	mutex_init(&winlock_mutex, NULL, MUTEX_DEFAULT, NULL);
1690	e = mod_install(&modlinkage);
1691	if (e) {
1692		mutex_destroy(&winlock_mutex);
1693	}
1694	return (e);
1695}
1696
1697
1698int
1699_info(struct modinfo *modinfop)
1700{
1701	return (mod_info(&modlinkage, modinfop));
1702}
1703
1704int
1705_fini(void)
1706{
1707	int	e;
1708
1709	e = mod_remove(&modlinkage);
1710	if (e == 0) {
1711		mutex_destroy(&winlock_mutex);
1712	}
1713	return (e);
1714}
1715