1139804Simp/*-
2177633Sdfr * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
3177633Sdfr * Authors: Doug Rabson <dfr@rabson.org>
4177633Sdfr * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
5177633Sdfr *
6177633Sdfr * Redistribution and use in source and binary forms, with or without
7177633Sdfr * modification, are permitted provided that the following conditions
8177633Sdfr * are met:
9177633Sdfr * 1. Redistributions of source code must retain the above copyright
10177633Sdfr *    notice, this list of conditions and the following disclaimer.
11177633Sdfr * 2. Redistributions in binary form must reproduce the above copyright
12177633Sdfr *    notice, this list of conditions and the following disclaimer in the
13177633Sdfr *    documentation and/or other materials provided with the distribution.
14177633Sdfr *
15177633Sdfr * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16177633Sdfr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17177633Sdfr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18177633Sdfr * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19177633Sdfr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20177633Sdfr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21177633Sdfr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22177633Sdfr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23177633Sdfr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24177633Sdfr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25177633Sdfr * SUCH DAMAGE.
26177633Sdfr */
27177633Sdfr/*-
281960Sdg * Copyright (c) 1982, 1986, 1989, 1993
291960Sdg *	The Regents of the University of California.  All rights reserved.
301960Sdg *
311960Sdg * This code is derived from software contributed to Berkeley by
321960Sdg * Scooter Morris at Genentech Inc.
331960Sdg *
341960Sdg * Redistribution and use in source and binary forms, with or without
351960Sdg * modification, are permitted provided that the following conditions
361960Sdg * are met:
371960Sdg * 1. Redistributions of source code must retain the above copyright
381960Sdg *    notice, this list of conditions and the following disclaimer.
391960Sdg * 2. Redistributions in binary form must reproduce the above copyright
401960Sdg *    notice, this list of conditions and the following disclaimer in the
411960Sdg *    documentation and/or other materials provided with the distribution.
421960Sdg * 4. Neither the name of the University nor the names of its contributors
431960Sdg *    may be used to endorse or promote products derived from this software
441960Sdg *    without specific prior written permission.
451960Sdg *
461960Sdg * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
471960Sdg * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
481960Sdg * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
491960Sdg * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
501960Sdg * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
511960Sdg * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
521960Sdg * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
531960Sdg * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
541960Sdg * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
551960Sdg * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
561960Sdg * SUCH DAMAGE.
571960Sdg *
581960Sdg *	@(#)ufs_lockf.c	8.3 (Berkeley) 1/6/94
591960Sdg */
601960Sdg
61116182Sobrien#include <sys/cdefs.h>
62116182Sobrien__FBSDID("$FreeBSD: stable/11/sys/kern/kern_lockf.c 313728 2017-02-14 13:45:20Z avg $");
63116182Sobrien
6432929Seivind#include "opt_debug_lockf.h"
6532929Seivind
661960Sdg#include <sys/param.h>
671960Sdg#include <sys/systm.h>
68177633Sdfr#include <sys/hash.h>
6941059Speter#include <sys/kernel.h>
70114216Skan#include <sys/limits.h>
7131561Sbde#include <sys/lock.h>
72101778Sphk#include <sys/mount.h>
7376166Smarkm#include <sys/mutex.h>
741960Sdg#include <sys/proc.h>
75177633Sdfr#include <sys/sx.h>
7618020Sbde#include <sys/unistd.h>
771960Sdg#include <sys/vnode.h>
781960Sdg#include <sys/malloc.h>
791960Sdg#include <sys/fcntl.h>
801960Sdg#include <sys/lockf.h>
81177633Sdfr#include <sys/taskqueue.h>
821960Sdg
831960Sdg#ifdef LOCKF_DEBUG
8422880Sbde#include <sys/sysctl.h>
8522880Sbde
86306739Sjhb#include <ufs/ufs/extattr.h>
8722880Sbde#include <ufs/ufs/quota.h>
88306739Sjhb#include <ufs/ufs/ufsmount.h>
8922880Sbde#include <ufs/ufs/inode.h>
9022880Sbde
91177633Sdfrstatic int	lockf_debug = 0; /* control debug output */
9224481SbdeSYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, "");
931960Sdg#endif
941960Sdg
95227293Sedstatic MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
9630309Sphk
97177633Sdfrstruct owner_edge;
98177633Sdfrstruct owner_vertex;
99177633Sdfrstruct owner_vertex_list;
100177633Sdfrstruct owner_graph;
101177633Sdfr
102177633Sdfr#define NOLOCKF (struct lockf_entry *)0
1031960Sdg#define SELF	0x1
1041960Sdg#define OTHERS	0x2
105177633Sdfrstatic void	 lf_init(void *);
106177633Sdfrstatic int	 lf_hash_owner(caddr_t, struct flock *, int);
107177633Sdfrstatic int	 lf_owner_matches(struct lock_owner *, caddr_t, struct flock *,
108177633Sdfr    int);
109177633Sdfrstatic struct lockf_entry *
110177633Sdfr		 lf_alloc_lock(struct lock_owner *);
111192685Skibstatic int	 lf_free_lock(struct lockf_entry *);
112177633Sdfrstatic int	 lf_clearlock(struct lockf *, struct lockf_entry *);
113177633Sdfrstatic int	 lf_overlaps(struct lockf_entry *, struct lockf_entry *);
114177633Sdfrstatic int	 lf_blocks(struct lockf_entry *, struct lockf_entry *);
115177633Sdfrstatic void	 lf_free_edge(struct lockf_edge *);
116177633Sdfrstatic struct lockf_edge *
117177633Sdfr		 lf_alloc_edge(void);
118177633Sdfrstatic void	 lf_alloc_vertex(struct lockf_entry *);
119177633Sdfrstatic int	 lf_add_edge(struct lockf_entry *, struct lockf_entry *);
120177633Sdfrstatic void	 lf_remove_edge(struct lockf_edge *);
121177633Sdfrstatic void	 lf_remove_outgoing(struct lockf_entry *);
122177633Sdfrstatic void	 lf_remove_incoming(struct lockf_entry *);
123177633Sdfrstatic int	 lf_add_outgoing(struct lockf *, struct lockf_entry *);
124177633Sdfrstatic int	 lf_add_incoming(struct lockf *, struct lockf_entry *);
125177633Sdfrstatic int	 lf_findoverlap(struct lockf_entry **, struct lockf_entry *,
126177633Sdfr    int);
127177633Sdfrstatic struct lockf_entry *
128177633Sdfr		 lf_getblock(struct lockf *, struct lockf_entry *);
129177633Sdfrstatic int	 lf_getlock(struct lockf *, struct lockf_entry *, struct flock *);
130177633Sdfrstatic void	 lf_insert_lock(struct lockf *, struct lockf_entry *);
131177633Sdfrstatic void	 lf_wakeup_lock(struct lockf *, struct lockf_entry *);
132177633Sdfrstatic void	 lf_update_dependancies(struct lockf *, struct lockf_entry *,
133177633Sdfr    int all, struct lockf_entry_list *);
134177633Sdfrstatic void	 lf_set_start(struct lockf *, struct lockf_entry *, off_t,
135177633Sdfr	struct lockf_entry_list*);
136177633Sdfrstatic void	 lf_set_end(struct lockf *, struct lockf_entry *, off_t,
137177633Sdfr	struct lockf_entry_list*);
138177633Sdfrstatic int	 lf_setlock(struct lockf *, struct lockf_entry *,
139177633Sdfr    struct vnode *, void **cookiep);
140177633Sdfrstatic int	 lf_cancel(struct lockf *, struct lockf_entry *, void *);
141177633Sdfrstatic void	 lf_split(struct lockf *, struct lockf_entry *,
142177633Sdfr    struct lockf_entry *, struct lockf_entry_list *);
143140808Sjeff#ifdef LOCKF_DEBUG
144177633Sdfrstatic int	 graph_reaches(struct owner_vertex *x, struct owner_vertex *y,
145177633Sdfr    struct owner_vertex_list *path);
146177633Sdfrstatic void	 graph_check(struct owner_graph *g, int checkorder);
147177633Sdfrstatic void	 graph_print_vertices(struct owner_vertex_list *set);
148140808Sjeff#endif
149177633Sdfrstatic int	 graph_delta_forward(struct owner_graph *g,
150177633Sdfr    struct owner_vertex *x, struct owner_vertex *y,
151177633Sdfr    struct owner_vertex_list *delta);
152177633Sdfrstatic int	 graph_delta_backward(struct owner_graph *g,
153177633Sdfr    struct owner_vertex *x, struct owner_vertex *y,
154177633Sdfr    struct owner_vertex_list *delta);
155177633Sdfrstatic int	 graph_add_indices(int *indices, int n,
156177633Sdfr    struct owner_vertex_list *set);
157177633Sdfrstatic int	 graph_assign_indices(struct owner_graph *g, int *indices,
158177633Sdfr    int nextunused, struct owner_vertex_list *set);
159177633Sdfrstatic int	 graph_add_edge(struct owner_graph *g,
160177633Sdfr    struct owner_vertex *x, struct owner_vertex *y);
161177633Sdfrstatic void	 graph_remove_edge(struct owner_graph *g,
162177633Sdfr    struct owner_vertex *x, struct owner_vertex *y);
163177633Sdfrstatic struct owner_vertex *graph_alloc_vertex(struct owner_graph *g,
164177633Sdfr    struct lock_owner *lo);
165177633Sdfrstatic void	 graph_free_vertex(struct owner_graph *g,
166177633Sdfr    struct owner_vertex *v);
167177633Sdfrstatic struct owner_graph * graph_init(struct owner_graph *g);
168177633Sdfr#ifdef LOCKF_DEBUG
169177633Sdfrstatic void	 lf_print(char *, struct lockf_entry *);
170177633Sdfrstatic void	 lf_printlist(char *, struct lockf_entry *);
171177633Sdfrstatic void	 lf_print_owner(struct lock_owner *);
172177633Sdfr#endif
1731960Sdg
1741960Sdg/*
175177633Sdfr * This structure is used to keep track of both local and remote lock
176177633Sdfr * owners. The lf_owner field of the struct lockf_entry points back at
177177633Sdfr * the lock owner structure. Each possible lock owner (local proc for
178177633Sdfr * POSIX fcntl locks, local file for BSD flock locks or <pid,sysid>
179177633Sdfr * pair for remote locks) is represented by a unique instance of
180177633Sdfr * struct lock_owner.
181177633Sdfr *
182177633Sdfr * If a lock owner has a lock that blocks some other lock or a lock
183177633Sdfr * that is waiting for some other lock, it also has a vertex in the
184177633Sdfr * owner_graph below.
185177633Sdfr *
186177633Sdfr * Locks:
187177633Sdfr * (s)		locked by state->ls_lock
188177633Sdfr * (S)		locked by lf_lock_states_lock
189177633Sdfr * (l)		locked by lf_lock_owners_lock
190177633Sdfr * (g)		locked by lf_owner_graph_lock
191177633Sdfr * (c)		const until freeing
192177633Sdfr */
193177633Sdfr#define	LOCK_OWNER_HASH_SIZE	256
194177633Sdfr
195177633Sdfrstruct lock_owner {
196177633Sdfr	LIST_ENTRY(lock_owner) lo_link; /* (l) hash chain */
197177633Sdfr	int	lo_refs;	    /* (l) Number of locks referring to this */
198177633Sdfr	int	lo_flags;	    /* (c) Flags passwd to lf_advlock */
199177633Sdfr	caddr_t	lo_id;		    /* (c) Id value passed to lf_advlock */
200177633Sdfr	pid_t	lo_pid;		    /* (c) Process Id of the lock owner */
201177633Sdfr	int	lo_sysid;	    /* (c) System Id of the lock owner */
202177633Sdfr	struct owner_vertex *lo_vertex; /* (g) entry in deadlock graph */
203177633Sdfr};
204177633Sdfr
205177633SdfrLIST_HEAD(lock_owner_list, lock_owner);
206177633Sdfr
207177633Sdfrstatic struct sx		lf_lock_states_lock;
208177633Sdfrstatic struct lockf_list	lf_lock_states; /* (S) */
209177633Sdfrstatic struct sx		lf_lock_owners_lock;
210177633Sdfrstatic struct lock_owner_list	lf_lock_owners[LOCK_OWNER_HASH_SIZE]; /* (l) */
211177633Sdfr
212177633Sdfr/*
213177633Sdfr * Structures for deadlock detection.
214177633Sdfr *
215177633Sdfr * We have two types of directed graph, the first is the set of locks,
216177633Sdfr * both active and pending on a vnode. Within this graph, active locks
217177633Sdfr * are terminal nodes in the graph (i.e. have no out-going
218177633Sdfr * edges). Pending locks have out-going edges to each blocking active
219177633Sdfr * lock that prevents the lock from being granted and also to each
220177633Sdfr * older pending lock that would block them if it was active. The
221177633Sdfr * graph for each vnode is naturally acyclic; new edges are only ever
222177633Sdfr * added to or from new nodes (either new pending locks which only add
223177633Sdfr * out-going edges or new active locks which only add in-coming edges)
224177633Sdfr * therefore they cannot create loops in the lock graph.
225177633Sdfr *
226177633Sdfr * The second graph is a global graph of lock owners. Each lock owner
227177633Sdfr * is a vertex in that graph and an edge is added to the graph
228177633Sdfr * whenever an edge is added to a vnode graph, with end points
229177633Sdfr * corresponding to owner of the new pending lock and the owner of the
230177633Sdfr * lock upon which it waits. In order to prevent deadlock, we only add
231177633Sdfr * an edge to this graph if the new edge would not create a cycle.
232177633Sdfr *
233177633Sdfr * The lock owner graph is topologically sorted, i.e. if a node has
234177633Sdfr * any outgoing edges, then it has an order strictly less than any
235177633Sdfr * node to which it has an outgoing edge. We preserve this ordering
236177633Sdfr * (and detect cycles) on edge insertion using Algorithm PK from the
237177633Sdfr * paper "A Dynamic Topological Sort Algorithm for Directed Acyclic
238177633Sdfr * Graphs" (ACM Journal of Experimental Algorithms, Vol 11, Article
239177633Sdfr * No. 1.7)
240177633Sdfr */
241177633Sdfrstruct owner_vertex;
242177633Sdfr
243177633Sdfrstruct owner_edge {
244177633Sdfr	LIST_ENTRY(owner_edge) e_outlink; /* (g) link from's out-edge list */
245177633Sdfr	LIST_ENTRY(owner_edge) e_inlink;  /* (g) link to's in-edge list */
246177633Sdfr	int		e_refs;		  /* (g) number of times added */
247177633Sdfr	struct owner_vertex *e_from;	  /* (c) out-going from here */
248177633Sdfr	struct owner_vertex *e_to;	  /* (c) in-coming to here */
249177633Sdfr};
250177633SdfrLIST_HEAD(owner_edge_list, owner_edge);
251177633Sdfr
252177633Sdfrstruct owner_vertex {
253177633Sdfr	TAILQ_ENTRY(owner_vertex) v_link; /* (g) workspace for edge insertion */
254177633Sdfr	uint32_t	v_gen;		  /* (g) workspace for edge insertion */
255177633Sdfr	int		v_order;	  /* (g) order of vertex in graph */
256177633Sdfr	struct owner_edge_list v_outedges;/* (g) list of out-edges */
257177633Sdfr	struct owner_edge_list v_inedges; /* (g) list of in-edges */
258177633Sdfr	struct lock_owner *v_owner;	  /* (c) corresponding lock owner */
259177633Sdfr};
260177633SdfrTAILQ_HEAD(owner_vertex_list, owner_vertex);
261177633Sdfr
262177633Sdfrstruct owner_graph {
263177633Sdfr	struct owner_vertex** g_vertices; /* (g) pointers to vertices */
264177633Sdfr	int		g_size;		  /* (g) number of vertices */
265177633Sdfr	int		g_space;	  /* (g) space allocated for vertices */
266177633Sdfr	int		*g_indexbuf;	  /* (g) workspace for loop detection */
267177633Sdfr	uint32_t	g_gen;		  /* (g) increment when re-ordering */
268177633Sdfr};
269177633Sdfr
270177633Sdfrstatic struct sx		lf_owner_graph_lock;
271177633Sdfrstatic struct owner_graph	lf_owner_graph;
272177633Sdfr
273177633Sdfr/*
274177633Sdfr * Initialise various structures and locks.
275177633Sdfr */
276177633Sdfrstatic void
277177633Sdfrlf_init(void *dummy)
278177633Sdfr{
279177633Sdfr	int i;
280177633Sdfr
281177633Sdfr	sx_init(&lf_lock_states_lock, "lock states lock");
282177633Sdfr	LIST_INIT(&lf_lock_states);
283177633Sdfr
284177633Sdfr	sx_init(&lf_lock_owners_lock, "lock owners lock");
285177633Sdfr	for (i = 0; i < LOCK_OWNER_HASH_SIZE; i++)
286177633Sdfr		LIST_INIT(&lf_lock_owners[i]);
287177633Sdfr
288177633Sdfr	sx_init(&lf_owner_graph_lock, "owner graph lock");
289177633Sdfr	graph_init(&lf_owner_graph);
290177633Sdfr}
291177633SdfrSYSINIT(lf_init, SI_SUB_LOCK, SI_ORDER_FIRST, lf_init, NULL);
292177633Sdfr
293177633Sdfr/*
294177633Sdfr * Generate a hash value for a lock owner.
295177633Sdfr */
296177633Sdfrstatic int
297177633Sdfrlf_hash_owner(caddr_t id, struct flock *fl, int flags)
298177633Sdfr{
299177633Sdfr	uint32_t h;
300177633Sdfr
301177633Sdfr	if (flags & F_REMOTE) {
302177633Sdfr		h = HASHSTEP(0, fl->l_pid);
303177633Sdfr		h = HASHSTEP(h, fl->l_sysid);
304177633Sdfr	} else if (flags & F_FLOCK) {
305177633Sdfr		h = ((uintptr_t) id) >> 7;
306177633Sdfr	} else {
307177633Sdfr		struct proc *p = (struct proc *) id;
308177633Sdfr		h = HASHSTEP(0, p->p_pid);
309177633Sdfr		h = HASHSTEP(h, 0);
310177633Sdfr	}
311177633Sdfr
312177633Sdfr	return (h % LOCK_OWNER_HASH_SIZE);
313177633Sdfr}
314177633Sdfr
315177633Sdfr/*
316177633Sdfr * Return true if a lock owner matches the details passed to
317177633Sdfr * lf_advlock.
318177633Sdfr */
319177633Sdfrstatic int
320177633Sdfrlf_owner_matches(struct lock_owner *lo, caddr_t id, struct flock *fl,
321177633Sdfr    int flags)
322177633Sdfr{
323177633Sdfr	if (flags & F_REMOTE) {
324177633Sdfr		return lo->lo_pid == fl->l_pid
325177633Sdfr			&& lo->lo_sysid == fl->l_sysid;
326177633Sdfr	} else {
327177633Sdfr		return lo->lo_id == id;
328177633Sdfr	}
329177633Sdfr}
330177633Sdfr
331177633Sdfrstatic struct lockf_entry *
332177633Sdfrlf_alloc_lock(struct lock_owner *lo)
333177633Sdfr{
334177633Sdfr	struct lockf_entry *lf;
335177633Sdfr
336177633Sdfr	lf = malloc(sizeof(struct lockf_entry), M_LOCKF, M_WAITOK|M_ZERO);
337177633Sdfr
338177633Sdfr#ifdef LOCKF_DEBUG
339177633Sdfr	if (lockf_debug & 4)
340177633Sdfr		printf("Allocated lock %p\n", lf);
341177633Sdfr#endif
342177633Sdfr	if (lo) {
343177633Sdfr		sx_xlock(&lf_lock_owners_lock);
344177633Sdfr		lo->lo_refs++;
345177633Sdfr		sx_xunlock(&lf_lock_owners_lock);
346177633Sdfr		lf->lf_owner = lo;
347177633Sdfr	}
348177633Sdfr
349177633Sdfr	return (lf);
350177633Sdfr}
351177633Sdfr
352192685Skibstatic int
353177633Sdfrlf_free_lock(struct lockf_entry *lock)
354177633Sdfr{
355192685Skib
356192685Skib	KASSERT(lock->lf_refs > 0, ("lockf_entry negative ref count %p", lock));
357192685Skib	if (--lock->lf_refs > 0)
358192685Skib		return (0);
359177633Sdfr	/*
360177633Sdfr	 * Adjust the lock_owner reference count and
361177633Sdfr	 * reclaim the entry if this is the last lock
362177633Sdfr	 * for that owner.
363177633Sdfr	 */
364177633Sdfr	struct lock_owner *lo = lock->lf_owner;
365177633Sdfr	if (lo) {
366177633Sdfr		KASSERT(LIST_EMPTY(&lock->lf_outedges),
367298819Spfg		    ("freeing lock with dependencies"));
368177633Sdfr		KASSERT(LIST_EMPTY(&lock->lf_inedges),
369177633Sdfr		    ("freeing lock with dependants"));
370177633Sdfr		sx_xlock(&lf_lock_owners_lock);
371177633Sdfr		KASSERT(lo->lo_refs > 0, ("lock owner refcount"));
372177633Sdfr		lo->lo_refs--;
373177633Sdfr		if (lo->lo_refs == 0) {
374177633Sdfr#ifdef LOCKF_DEBUG
375177633Sdfr			if (lockf_debug & 1)
376177633Sdfr				printf("lf_free_lock: freeing lock owner %p\n",
377177633Sdfr				    lo);
378177633Sdfr#endif
379177633Sdfr			if (lo->lo_vertex) {
380177633Sdfr				sx_xlock(&lf_owner_graph_lock);
381177633Sdfr				graph_free_vertex(&lf_owner_graph,
382177633Sdfr				    lo->lo_vertex);
383177633Sdfr				sx_xunlock(&lf_owner_graph_lock);
384177633Sdfr			}
385177633Sdfr			LIST_REMOVE(lo, lo_link);
386177633Sdfr			free(lo, M_LOCKF);
387177633Sdfr#ifdef LOCKF_DEBUG
388177633Sdfr			if (lockf_debug & 4)
389177633Sdfr				printf("Freed lock owner %p\n", lo);
390177633Sdfr#endif
391177633Sdfr		}
392177633Sdfr		sx_unlock(&lf_lock_owners_lock);
393177633Sdfr	}
394177633Sdfr	if ((lock->lf_flags & F_REMOTE) && lock->lf_vnode) {
395177633Sdfr		vrele(lock->lf_vnode);
396177633Sdfr		lock->lf_vnode = NULL;
397177633Sdfr	}
398177633Sdfr#ifdef LOCKF_DEBUG
399177633Sdfr	if (lockf_debug & 4)
400177633Sdfr		printf("Freed lock %p\n", lock);
401177633Sdfr#endif
402177633Sdfr	free(lock, M_LOCKF);
403192685Skib	return (1);
404177633Sdfr}
405177633Sdfr
406177633Sdfr/*
4071960Sdg * Advisory record locking support
4081960Sdg */
4091960Sdgint
410177633Sdfrlf_advlockasync(struct vop_advlockasync_args *ap, struct lockf **statep,
411177633Sdfr    u_quad_t size)
4121960Sdg{
413177633Sdfr	struct lockf *state, *freestate = NULL;
414171193Sjeff	struct flock *fl = ap->a_fl;
415177633Sdfr	struct lockf_entry *lock;
416171193Sjeff	struct vnode *vp = ap->a_vp;
417177633Sdfr	caddr_t id = ap->a_id;
418177633Sdfr	int flags = ap->a_flags;
419177633Sdfr	int hash;
420177633Sdfr	struct lock_owner *lo;
42182346Sache	off_t start, end, oadd;
4221960Sdg	int error;
4231960Sdg
4241960Sdg	/*
425177633Sdfr	 * Handle the F_UNLKSYS case first - no need to mess about
426177633Sdfr	 * creating a lock owner for this one.
427177633Sdfr	 */
428177633Sdfr	if (ap->a_op == F_UNLCKSYS) {
429177633Sdfr		lf_clearremotesys(fl->l_sysid);
430177633Sdfr		return (0);
431177633Sdfr	}
432177633Sdfr
433177633Sdfr	/*
4341960Sdg	 * Convert the flock structure into a start and end.
4351960Sdg	 */
4361960Sdg	switch (fl->l_whence) {
4371960Sdg
4381960Sdg	case SEEK_SET:
4391960Sdg	case SEEK_CUR:
4401960Sdg		/*
4411960Sdg		 * Caller is responsible for adding any necessary offset
4421960Sdg		 * when SEEK_CUR is used.
4431960Sdg		 */
4441960Sdg		start = fl->l_start;
4451960Sdg		break;
4461960Sdg
4471960Sdg	case SEEK_END:
44882516Sache		if (size > OFF_MAX ||
449171193Sjeff		    (fl->l_start > 0 && size > OFF_MAX - fl->l_start))
450171193Sjeff			return (EOVERFLOW);
4511960Sdg		start = size + fl->l_start;
4521960Sdg		break;
4531960Sdg
4541960Sdg	default:
455171193Sjeff		return (EINVAL);
4561960Sdg	}
457171193Sjeff	if (start < 0)
458171193Sjeff		return (EINVAL);
45982200Sache	if (fl->l_len < 0) {
460171193Sjeff		if (start == 0)
461171193Sjeff			return (EINVAL);
46282202Sache		end = start - 1;
46382200Sache		start += fl->l_len;
464171193Sjeff		if (start < 0)
465171193Sjeff			return (EINVAL);
466177633Sdfr	} else if (fl->l_len == 0) {
467177633Sdfr		end = OFF_MAX;
468177633Sdfr	} else {
46982346Sache		oadd = fl->l_len - 1;
470171193Sjeff		if (oadd > OFF_MAX - start)
471171193Sjeff			return (EOVERFLOW);
47282172Sache		end = start + oadd;
47320676Sbde	}
474268384Skib
475268384Skibretry_setlock:
476268384Skib
4771960Sdg	/*
47820676Sbde	 * Avoid the common case of unlocking when inode has no locks.
47920676Sbde	 */
480184227Sdfr	VI_LOCK(vp);
481184227Sdfr	if ((*statep) == NULL) {
48220676Sbde		if (ap->a_op != F_SETLK) {
48320676Sbde			fl->l_type = F_UNLCK;
484184227Sdfr			VI_UNLOCK(vp);
485171193Sjeff			return (0);
48620676Sbde		}
48720676Sbde	}
488184227Sdfr	VI_UNLOCK(vp);
489177633Sdfr
49020676Sbde	/*
491177633Sdfr	 * Map our arguments to an existing lock owner or create one
492177633Sdfr	 * if this is the first time we have seen this owner.
493171193Sjeff	 */
494177633Sdfr	hash = lf_hash_owner(id, fl, flags);
495177633Sdfr	sx_xlock(&lf_lock_owners_lock);
496177633Sdfr	LIST_FOREACH(lo, &lf_lock_owners[hash], lo_link)
497177633Sdfr		if (lf_owner_matches(lo, id, fl, flags))
498177633Sdfr			break;
499177633Sdfr	if (!lo) {
500177633Sdfr		/*
501177633Sdfr		 * We initialise the lock with a reference
502177633Sdfr		 * count which matches the new lockf_entry
503177633Sdfr		 * structure created below.
504177633Sdfr		 */
505177633Sdfr		lo = malloc(sizeof(struct lock_owner), M_LOCKF,
506177633Sdfr		    M_WAITOK|M_ZERO);
507177633Sdfr#ifdef LOCKF_DEBUG
508177633Sdfr		if (lockf_debug & 4)
509177633Sdfr			printf("Allocated lock owner %p\n", lo);
510177633Sdfr#endif
511177633Sdfr
512177633Sdfr		lo->lo_refs = 1;
513177633Sdfr		lo->lo_flags = flags;
514177633Sdfr		lo->lo_id = id;
515177633Sdfr		if (flags & F_REMOTE) {
516177633Sdfr			lo->lo_pid = fl->l_pid;
517177633Sdfr			lo->lo_sysid = fl->l_sysid;
518177633Sdfr		} else if (flags & F_FLOCK) {
519177633Sdfr			lo->lo_pid = -1;
520177633Sdfr			lo->lo_sysid = 0;
521177633Sdfr		} else {
522177633Sdfr			struct proc *p = (struct proc *) id;
523177633Sdfr			lo->lo_pid = p->p_pid;
524177633Sdfr			lo->lo_sysid = 0;
525177633Sdfr		}
526177633Sdfr		lo->lo_vertex = NULL;
527177633Sdfr
528177633Sdfr#ifdef LOCKF_DEBUG
529177633Sdfr		if (lockf_debug & 1) {
530177633Sdfr			printf("lf_advlockasync: new lock owner %p ", lo);
531177633Sdfr			lf_print_owner(lo);
532177633Sdfr			printf("\n");
533177633Sdfr		}
534177633Sdfr#endif
535177633Sdfr
536177633Sdfr		LIST_INSERT_HEAD(&lf_lock_owners[hash], lo, lo_link);
537177633Sdfr	} else {
538177633Sdfr		/*
539177633Sdfr		 * We have seen this lock owner before, increase its
540177633Sdfr		 * reference count to account for the new lockf_entry
541177633Sdfr		 * structure we create below.
542177633Sdfr		 */
543177633Sdfr		lo->lo_refs++;
544171772Skib	}
545177633Sdfr	sx_xunlock(&lf_lock_owners_lock);
546177633Sdfr
547171193Sjeff	/*
548177633Sdfr	 * Create the lockf structure. We initialise the lf_owner
549177633Sdfr	 * field here instead of in lf_alloc_lock() to avoid paying
550177633Sdfr	 * the lf_lock_owners_lock tax twice.
5511960Sdg	 */
552177633Sdfr	lock = lf_alloc_lock(NULL);
553192685Skib	lock->lf_refs = 1;
5541960Sdg	lock->lf_start = start;
5551960Sdg	lock->lf_end = end;
556177633Sdfr	lock->lf_owner = lo;
557177633Sdfr	lock->lf_vnode = vp;
558177633Sdfr	if (flags & F_REMOTE) {
559177633Sdfr		/*
560177633Sdfr		 * For remote locks, the caller may release its ref to
561177633Sdfr		 * the vnode at any time - we have to ref it here to
562177633Sdfr		 * prevent it from being recycled unexpectedly.
563177633Sdfr		 */
564177633Sdfr		vref(vp);
565177633Sdfr	}
566177633Sdfr
56787211Salfred	/*
56887211Salfred	 * XXX The problem is that VTOI is ufs specific, so it will
56987211Salfred	 * break LOCKF_DEBUG for all other FS's other than UFS because
57087211Salfred	 * it casts the vnode->data ptr to struct inode *.
57187211Salfred	 */
57287211Salfred/*	lock->lf_inode = VTOI(ap->a_vp); */
57387211Salfred	lock->lf_inode = (struct inode *)0;
57422521Sdyson	lock->lf_type = fl->l_type;
575177633Sdfr	LIST_INIT(&lock->lf_outedges);
576177633Sdfr	LIST_INIT(&lock->lf_inedges);
577177633Sdfr	lock->lf_async_task = ap->a_task;
5781960Sdg	lock->lf_flags = ap->a_flags;
579177633Sdfr
5801960Sdg	/*
581177633Sdfr	 * Do the requested operation. First find our state structure
582177633Sdfr	 * and create a new one if necessary - the caller's *statep
583177633Sdfr	 * variable and the state's ls_threads count is protected by
584177633Sdfr	 * the vnode interlock.
5851960Sdg	 */
586171193Sjeff	VI_LOCK(vp);
587178243Skib	if (vp->v_iflag & VI_DOOMED) {
588178243Skib		VI_UNLOCK(vp);
589178243Skib		lf_free_lock(lock);
590178243Skib		return (ENOENT);
591178243Skib	}
592177633Sdfr
593177633Sdfr	/*
594177633Sdfr	 * Allocate a state structure if necessary.
595177633Sdfr	 */
596177633Sdfr	state = *statep;
597177633Sdfr	if (state == NULL) {
598177633Sdfr		struct lockf *ls;
599177633Sdfr
600177633Sdfr		VI_UNLOCK(vp);
601177633Sdfr
602177633Sdfr		ls = malloc(sizeof(struct lockf), M_LOCKF, M_WAITOK|M_ZERO);
603177633Sdfr		sx_init(&ls->ls_lock, "ls_lock");
604177633Sdfr		LIST_INIT(&ls->ls_active);
605177633Sdfr		LIST_INIT(&ls->ls_pending);
606177841Sdfr		ls->ls_threads = 1;
607177633Sdfr
608177633Sdfr		sx_xlock(&lf_lock_states_lock);
609177633Sdfr		LIST_INSERT_HEAD(&lf_lock_states, ls, ls_link);
610177633Sdfr		sx_xunlock(&lf_lock_states_lock);
611177633Sdfr
612177633Sdfr		/*
613177633Sdfr		 * Cope if we lost a race with some other thread while
614177633Sdfr		 * trying to allocate memory.
615177633Sdfr		 */
616177633Sdfr		VI_LOCK(vp);
617178243Skib		if (vp->v_iflag & VI_DOOMED) {
618178243Skib			VI_UNLOCK(vp);
619178243Skib			sx_xlock(&lf_lock_states_lock);
620178243Skib			LIST_REMOVE(ls, ls_link);
621178243Skib			sx_xunlock(&lf_lock_states_lock);
622178243Skib			sx_destroy(&ls->ls_lock);
623178243Skib			free(ls, M_LOCKF);
624178243Skib			lf_free_lock(lock);
625178243Skib			return (ENOENT);
626178243Skib		}
627177633Sdfr		if ((*statep) == NULL) {
628177841Sdfr			state = *statep = ls;
629177841Sdfr			VI_UNLOCK(vp);
630177633Sdfr		} else {
631177841Sdfr			state = *statep;
632177841Sdfr			state->ls_threads++;
633177841Sdfr			VI_UNLOCK(vp);
634177841Sdfr
635177633Sdfr			sx_xlock(&lf_lock_states_lock);
636177633Sdfr			LIST_REMOVE(ls, ls_link);
637177633Sdfr			sx_xunlock(&lf_lock_states_lock);
638177633Sdfr			sx_destroy(&ls->ls_lock);
639177633Sdfr			free(ls, M_LOCKF);
640177633Sdfr		}
641177841Sdfr	} else {
642177841Sdfr		state->ls_threads++;
643177841Sdfr		VI_UNLOCK(vp);
644177633Sdfr	}
645177633Sdfr
646177633Sdfr	sx_xlock(&state->ls_lock);
647192683Skib	/*
648192683Skib	 * Recheck the doomed vnode after state->ls_lock is
649192683Skib	 * locked. lf_purgelocks() requires that no new threads add
650192683Skib	 * pending locks when vnode is marked by VI_DOOMED flag.
651192683Skib	 */
652192683Skib	VI_LOCK(vp);
653192683Skib	if (vp->v_iflag & VI_DOOMED) {
654194356Skib		state->ls_threads--;
655194356Skib		wakeup(state);
656192683Skib		VI_UNLOCK(vp);
657193931Skib		sx_xunlock(&state->ls_lock);
658192683Skib		lf_free_lock(lock);
659192683Skib		return (ENOENT);
660192683Skib	}
661192683Skib	VI_UNLOCK(vp);
662192683Skib
663192683Skib	switch (ap->a_op) {
6641960Sdg	case F_SETLK:
665177633Sdfr		error = lf_setlock(state, lock, vp, ap->a_cookiep);
666171193Sjeff		break;
6671960Sdg
6681960Sdg	case F_UNLCK:
669177633Sdfr		error = lf_clearlock(state, lock);
670177633Sdfr		lf_free_lock(lock);
671171193Sjeff		break;
6721960Sdg
6731960Sdg	case F_GETLK:
674177633Sdfr		error = lf_getlock(state, lock, fl);
675177633Sdfr		lf_free_lock(lock);
676171193Sjeff		break;
6778876Srgrimes
678177633Sdfr	case F_CANCEL:
679177633Sdfr		if (ap->a_cookiep)
680177633Sdfr			error = lf_cancel(state, lock, *ap->a_cookiep);
681177633Sdfr		else
682177633Sdfr			error = EINVAL;
683177633Sdfr		lf_free_lock(lock);
684177633Sdfr		break;
685177633Sdfr
6861960Sdg	default:
687177633Sdfr		lf_free_lock(lock);
688140808Sjeff		error = EINVAL;
689171193Sjeff		break;
6901960Sdg	}
691177633Sdfr
692313728Savg#ifdef DIAGNOSTIC
693177633Sdfr	/*
694177633Sdfr	 * Check for some can't happen stuff. In this case, the active
695177633Sdfr	 * lock list becoming disordered or containing mutually
696177633Sdfr	 * blocking locks. We also check the pending list for locks
697177633Sdfr	 * which should be active (i.e. have no out-going edges).
698177633Sdfr	 */
699177633Sdfr	LIST_FOREACH(lock, &state->ls_active, lf_link) {
700177633Sdfr		struct lockf_entry *lf;
701177633Sdfr		if (LIST_NEXT(lock, lf_link))
702177633Sdfr			KASSERT((lock->lf_start
703177633Sdfr				<= LIST_NEXT(lock, lf_link)->lf_start),
704177633Sdfr			    ("locks disordered"));
705177633Sdfr		LIST_FOREACH(lf, &state->ls_active, lf_link) {
706177633Sdfr			if (lock == lf)
707177633Sdfr				break;
708177633Sdfr			KASSERT(!lf_blocks(lock, lf),
709177633Sdfr			    ("two conflicting active locks"));
710177633Sdfr			if (lock->lf_owner == lf->lf_owner)
711177633Sdfr				KASSERT(!lf_overlaps(lock, lf),
712177633Sdfr				    ("two overlapping locks from same owner"));
713177633Sdfr		}
714177633Sdfr	}
715177633Sdfr	LIST_FOREACH(lock, &state->ls_pending, lf_link) {
716177633Sdfr		KASSERT(!LIST_EMPTY(&lock->lf_outedges),
717177633Sdfr		    ("pending lock which should be active"));
718177633Sdfr	}
719177633Sdfr#endif
720177633Sdfr	sx_xunlock(&state->ls_lock);
721177633Sdfr
722177633Sdfr	/*
723177633Sdfr	 * If we have removed the last active lock on the vnode and
724177633Sdfr	 * this is the last thread that was in-progress, we can free
725177633Sdfr	 * the state structure. We update the caller's pointer inside
726177633Sdfr	 * the vnode interlock but call free outside.
727177633Sdfr	 *
728177633Sdfr	 * XXX alternatively, keep the state structure around until
729177633Sdfr	 * the filesystem recycles - requires a callback from the
730177633Sdfr	 * filesystem.
731177633Sdfr	 */
732177633Sdfr	VI_LOCK(vp);
733177633Sdfr
734177633Sdfr	state->ls_threads--;
735178243Skib	wakeup(state);
736177633Sdfr	if (LIST_EMPTY(&state->ls_active) && state->ls_threads == 0) {
737177633Sdfr		KASSERT(LIST_EMPTY(&state->ls_pending),
738177633Sdfr		    ("freeing state with pending locks"));
739177633Sdfr		freestate = state;
740177633Sdfr		*statep = NULL;
741177633Sdfr	}
742177633Sdfr
743171193Sjeff	VI_UNLOCK(vp);
744177633Sdfr
745276904Sdelphij	if (freestate != NULL) {
746177633Sdfr		sx_xlock(&lf_lock_states_lock);
747177633Sdfr		LIST_REMOVE(freestate, ls_link);
748177633Sdfr		sx_xunlock(&lf_lock_states_lock);
749177633Sdfr		sx_destroy(&freestate->ls_lock);
750177633Sdfr		free(freestate, M_LOCKF);
751276904Sdelphij		freestate = NULL;
752171772Skib	}
753268384Skib
754268384Skib	if (error == EDOOFUS) {
755268384Skib		KASSERT(ap->a_op == F_SETLK, ("EDOOFUS"));
756268384Skib		goto retry_setlock;
757268384Skib	}
758140808Sjeff	return (error);
7591960Sdg}
7601960Sdg
761177633Sdfrint
762177633Sdfrlf_advlock(struct vop_advlock_args *ap, struct lockf **statep, u_quad_t size)
763177633Sdfr{
764177633Sdfr	struct vop_advlockasync_args a;
765177633Sdfr
766177633Sdfr	a.a_vp = ap->a_vp;
767177633Sdfr	a.a_id = ap->a_id;
768177633Sdfr	a.a_op = ap->a_op;
769177633Sdfr	a.a_fl = ap->a_fl;
770177633Sdfr	a.a_flags = ap->a_flags;
771177633Sdfr	a.a_task = NULL;
772177633Sdfr	a.a_cookiep = NULL;
773177633Sdfr
774177633Sdfr	return (lf_advlockasync(&a, statep, size));
775177633Sdfr}
776177633Sdfr
777178243Skibvoid
778178243Skiblf_purgelocks(struct vnode *vp, struct lockf **statep)
779178243Skib{
780178243Skib	struct lockf *state;
781178243Skib	struct lockf_entry *lock, *nlock;
782178243Skib
783178243Skib	/*
784178243Skib	 * For this to work correctly, the caller must ensure that no
785178243Skib	 * other threads enter the locking system for this vnode,
786178243Skib	 * e.g. by checking VI_DOOMED. We wake up any threads that are
787178243Skib	 * sleeping waiting for locks on this vnode and then free all
788178243Skib	 * the remaining locks.
789178243Skib	 */
790178243Skib	VI_LOCK(vp);
791192683Skib	KASSERT(vp->v_iflag & VI_DOOMED,
792192683Skib	    ("lf_purgelocks: vp %p has not vgone yet", vp));
793178243Skib	state = *statep;
794178243Skib	if (state) {
795192683Skib		*statep = NULL;
796178243Skib		state->ls_threads++;
797178243Skib		VI_UNLOCK(vp);
798178243Skib
799178243Skib		sx_xlock(&state->ls_lock);
800178243Skib		sx_xlock(&lf_owner_graph_lock);
801178243Skib		LIST_FOREACH_SAFE(lock, &state->ls_pending, lf_link, nlock) {
802178243Skib			LIST_REMOVE(lock, lf_link);
803178243Skib			lf_remove_outgoing(lock);
804178243Skib			lf_remove_incoming(lock);
805178243Skib
806178243Skib			/*
807178243Skib			 * If its an async lock, we can just free it
808178243Skib			 * here, otherwise we let the sleeping thread
809178243Skib			 * free it.
810178243Skib			 */
811178243Skib			if (lock->lf_async_task) {
812178243Skib				lf_free_lock(lock);
813178243Skib			} else {
814178243Skib				lock->lf_flags |= F_INTR;
815178243Skib				wakeup(lock);
816178243Skib			}
817178243Skib		}
818178243Skib		sx_xunlock(&lf_owner_graph_lock);
819178243Skib		sx_xunlock(&state->ls_lock);
820178243Skib
821178243Skib		/*
822178243Skib		 * Wait for all other threads, sleeping and otherwise
823178243Skib		 * to leave.
824178243Skib		 */
825178243Skib		VI_LOCK(vp);
826178243Skib		while (state->ls_threads > 1)
827178243Skib			msleep(state, VI_MTX(vp), 0, "purgelocks", 0);
828178243Skib		VI_UNLOCK(vp);
829178243Skib
830178243Skib		/*
831178243Skib		 * We can just free all the active locks since they
832298819Spfg		 * will have no dependencies (we removed them all
833178243Skib		 * above). We don't need to bother locking since we
834178243Skib		 * are the last thread using this state structure.
835178243Skib		 */
836192684Skib		KASSERT(LIST_EMPTY(&state->ls_pending),
837192684Skib		    ("lock pending for %p", state));
838192684Skib		LIST_FOREACH_SAFE(lock, &state->ls_active, lf_link, nlock) {
839178243Skib			LIST_REMOVE(lock, lf_link);
840178243Skib			lf_free_lock(lock);
841178243Skib		}
842178243Skib		sx_xlock(&lf_lock_states_lock);
843178243Skib		LIST_REMOVE(state, ls_link);
844178243Skib		sx_xunlock(&lf_lock_states_lock);
845178243Skib		sx_destroy(&state->ls_lock);
846178243Skib		free(state, M_LOCKF);
847178243Skib	} else {
848178243Skib		VI_UNLOCK(vp);
849178243Skib	}
850178243Skib}
851178243Skib
8521960Sdg/*
853177633Sdfr * Return non-zero if locks 'x' and 'y' overlap.
854177633Sdfr */
855177633Sdfrstatic int
856177633Sdfrlf_overlaps(struct lockf_entry *x, struct lockf_entry *y)
857177633Sdfr{
858177633Sdfr
859177633Sdfr	return (x->lf_start <= y->lf_end && x->lf_end >= y->lf_start);
860177633Sdfr}
861177633Sdfr
862177633Sdfr/*
863177633Sdfr * Return non-zero if lock 'x' is blocked by lock 'y' (or vice versa).
864177633Sdfr */
865177633Sdfrstatic int
866177633Sdfrlf_blocks(struct lockf_entry *x, struct lockf_entry *y)
867177633Sdfr{
868177633Sdfr
869177633Sdfr	return x->lf_owner != y->lf_owner
870177633Sdfr		&& (x->lf_type == F_WRLCK || y->lf_type == F_WRLCK)
871177633Sdfr		&& lf_overlaps(x, y);
872177633Sdfr}
873177633Sdfr
874177633Sdfr/*
875177633Sdfr * Allocate a lock edge from the free list
876177633Sdfr */
877177633Sdfrstatic struct lockf_edge *
878177633Sdfrlf_alloc_edge(void)
879177633Sdfr{
880177633Sdfr
881177633Sdfr	return (malloc(sizeof(struct lockf_edge), M_LOCKF, M_WAITOK|M_ZERO));
882177633Sdfr}
883177633Sdfr
884177633Sdfr/*
885177633Sdfr * Free a lock edge.
886177633Sdfr */
887177633Sdfrstatic void
888177633Sdfrlf_free_edge(struct lockf_edge *e)
889177633Sdfr{
890177633Sdfr
891177633Sdfr	free(e, M_LOCKF);
892177633Sdfr}
893177633Sdfr
894177633Sdfr
895177633Sdfr/*
896177633Sdfr * Ensure that the lock's owner has a corresponding vertex in the
897177633Sdfr * owner graph.
898177633Sdfr */
899177633Sdfrstatic void
900177633Sdfrlf_alloc_vertex(struct lockf_entry *lock)
901177633Sdfr{
902177633Sdfr	struct owner_graph *g = &lf_owner_graph;
903177633Sdfr
904177633Sdfr	if (!lock->lf_owner->lo_vertex)
905177633Sdfr		lock->lf_owner->lo_vertex =
906177633Sdfr			graph_alloc_vertex(g, lock->lf_owner);
907177633Sdfr}
908177633Sdfr
909177633Sdfr/*
910177633Sdfr * Attempt to record an edge from lock x to lock y. Return EDEADLK if
911177633Sdfr * the new edge would cause a cycle in the owner graph.
912177633Sdfr */
913177633Sdfrstatic int
914177633Sdfrlf_add_edge(struct lockf_entry *x, struct lockf_entry *y)
915177633Sdfr{
916177633Sdfr	struct owner_graph *g = &lf_owner_graph;
917177633Sdfr	struct lockf_edge *e;
918177633Sdfr	int error;
919177633Sdfr
920313728Savg#ifdef DIAGNOSTIC
921177633Sdfr	LIST_FOREACH(e, &x->lf_outedges, le_outlink)
922177633Sdfr		KASSERT(e->le_to != y, ("adding lock edge twice"));
923177633Sdfr#endif
924177633Sdfr
925177633Sdfr	/*
926177633Sdfr	 * Make sure the two owners have entries in the owner graph.
927177633Sdfr	 */
928177633Sdfr	lf_alloc_vertex(x);
929177633Sdfr	lf_alloc_vertex(y);
930177633Sdfr
931177633Sdfr	error = graph_add_edge(g, x->lf_owner->lo_vertex,
932177633Sdfr	    y->lf_owner->lo_vertex);
933177633Sdfr	if (error)
934177633Sdfr		return (error);
935177633Sdfr
936177633Sdfr	e = lf_alloc_edge();
937177633Sdfr	LIST_INSERT_HEAD(&x->lf_outedges, e, le_outlink);
938177633Sdfr	LIST_INSERT_HEAD(&y->lf_inedges, e, le_inlink);
939177633Sdfr	e->le_from = x;
940177633Sdfr	e->le_to = y;
941177633Sdfr
942177633Sdfr	return (0);
943177633Sdfr}
944177633Sdfr
945177633Sdfr/*
946177633Sdfr * Remove an edge from the lock graph.
947177633Sdfr */
948177633Sdfrstatic void
949177633Sdfrlf_remove_edge(struct lockf_edge *e)
950177633Sdfr{
951177633Sdfr	struct owner_graph *g = &lf_owner_graph;
952177633Sdfr	struct lockf_entry *x = e->le_from;
953177633Sdfr	struct lockf_entry *y = e->le_to;
954177633Sdfr
955177633Sdfr	graph_remove_edge(g, x->lf_owner->lo_vertex, y->lf_owner->lo_vertex);
956177633Sdfr	LIST_REMOVE(e, le_outlink);
957177633Sdfr	LIST_REMOVE(e, le_inlink);
958177633Sdfr	e->le_from = NULL;
959177633Sdfr	e->le_to = NULL;
960177633Sdfr	lf_free_edge(e);
961177633Sdfr}
962177633Sdfr
963177633Sdfr/*
964177633Sdfr * Remove all out-going edges from lock x.
965177633Sdfr */
966177633Sdfrstatic void
967177633Sdfrlf_remove_outgoing(struct lockf_entry *x)
968177633Sdfr{
969177633Sdfr	struct lockf_edge *e;
970177633Sdfr
971177633Sdfr	while ((e = LIST_FIRST(&x->lf_outedges)) != NULL) {
972177633Sdfr		lf_remove_edge(e);
973177633Sdfr	}
974177633Sdfr}
975177633Sdfr
976177633Sdfr/*
977177633Sdfr * Remove all in-coming edges from lock x.
978177633Sdfr */
979177633Sdfrstatic void
980177633Sdfrlf_remove_incoming(struct lockf_entry *x)
981177633Sdfr{
982177633Sdfr	struct lockf_edge *e;
983177633Sdfr
984177633Sdfr	while ((e = LIST_FIRST(&x->lf_inedges)) != NULL) {
985177633Sdfr		lf_remove_edge(e);
986177633Sdfr	}
987177633Sdfr}
988177633Sdfr
989177633Sdfr/*
990177633Sdfr * Walk the list of locks for the file and create an out-going edge
991177633Sdfr * from lock to each blocking lock.
992177633Sdfr */
993177633Sdfrstatic int
994177633Sdfrlf_add_outgoing(struct lockf *state, struct lockf_entry *lock)
995177633Sdfr{
996177633Sdfr	struct lockf_entry *overlap;
997177633Sdfr	int error;
998177633Sdfr
999177633Sdfr	LIST_FOREACH(overlap, &state->ls_active, lf_link) {
1000177633Sdfr		/*
1001177633Sdfr		 * We may assume that the active list is sorted by
1002177633Sdfr		 * lf_start.
1003177633Sdfr		 */
1004177633Sdfr		if (overlap->lf_start > lock->lf_end)
1005177633Sdfr			break;
1006177633Sdfr		if (!lf_blocks(lock, overlap))
1007177633Sdfr			continue;
1008177633Sdfr
1009177633Sdfr		/*
1010177633Sdfr		 * We've found a blocking lock. Add the corresponding
1011177633Sdfr		 * edge to the graphs and see if it would cause a
1012177633Sdfr		 * deadlock.
1013177633Sdfr		 */
1014177633Sdfr		error = lf_add_edge(lock, overlap);
1015177633Sdfr
1016177633Sdfr		/*
1017177633Sdfr		 * The only error that lf_add_edge returns is EDEADLK.
1018177633Sdfr		 * Remove any edges we added and return the error.
1019177633Sdfr		 */
1020177633Sdfr		if (error) {
1021177633Sdfr			lf_remove_outgoing(lock);
1022177633Sdfr			return (error);
1023177633Sdfr		}
1024177633Sdfr	}
1025177633Sdfr
1026177633Sdfr	/*
1027177633Sdfr	 * We also need to add edges to sleeping locks that block
1028177633Sdfr	 * us. This ensures that lf_wakeup_lock cannot grant two
1029177633Sdfr	 * mutually blocking locks simultaneously and also enforces a
1030177633Sdfr	 * 'first come, first served' fairness model. Note that this
1031177633Sdfr	 * only happens if we are blocked by at least one active lock
1032177633Sdfr	 * due to the call to lf_getblock in lf_setlock below.
1033177633Sdfr	 */
1034177633Sdfr	LIST_FOREACH(overlap, &state->ls_pending, lf_link) {
1035177633Sdfr		if (!lf_blocks(lock, overlap))
1036177633Sdfr			continue;
1037177633Sdfr		/*
1038177633Sdfr		 * We've found a blocking lock. Add the corresponding
1039177633Sdfr		 * edge to the graphs and see if it would cause a
1040177633Sdfr		 * deadlock.
1041177633Sdfr		 */
1042177633Sdfr		error = lf_add_edge(lock, overlap);
1043177633Sdfr
1044177633Sdfr		/*
1045177633Sdfr		 * The only error that lf_add_edge returns is EDEADLK.
1046177633Sdfr		 * Remove any edges we added and return the error.
1047177633Sdfr		 */
1048177633Sdfr		if (error) {
1049177633Sdfr			lf_remove_outgoing(lock);
1050177633Sdfr			return (error);
1051177633Sdfr		}
1052177633Sdfr	}
1053177633Sdfr
1054177633Sdfr	return (0);
1055177633Sdfr}
1056177633Sdfr
1057177633Sdfr/*
1058177633Sdfr * Walk the list of pending locks for the file and create an in-coming
1059177633Sdfr * edge from lock to each blocking lock.
1060177633Sdfr */
1061177633Sdfrstatic int
1062177633Sdfrlf_add_incoming(struct lockf *state, struct lockf_entry *lock)
1063177633Sdfr{
1064177633Sdfr	struct lockf_entry *overlap;
1065177633Sdfr	int error;
1066177633Sdfr
1067177633Sdfr	LIST_FOREACH(overlap, &state->ls_pending, lf_link) {
1068177633Sdfr		if (!lf_blocks(lock, overlap))
1069177633Sdfr			continue;
1070177633Sdfr
1071177633Sdfr		/*
1072177633Sdfr		 * We've found a blocking lock. Add the corresponding
1073177633Sdfr		 * edge to the graphs and see if it would cause a
1074177633Sdfr		 * deadlock.
1075177633Sdfr		 */
1076177633Sdfr		error = lf_add_edge(overlap, lock);
1077177633Sdfr
1078177633Sdfr		/*
1079177633Sdfr		 * The only error that lf_add_edge returns is EDEADLK.
1080177633Sdfr		 * Remove any edges we added and return the error.
1081177633Sdfr		 */
1082177633Sdfr		if (error) {
1083177633Sdfr			lf_remove_incoming(lock);
1084177633Sdfr			return (error);
1085177633Sdfr		}
1086177633Sdfr	}
1087177633Sdfr	return (0);
1088177633Sdfr}
1089177633Sdfr
1090177633Sdfr/*
1091177633Sdfr * Insert lock into the active list, keeping list entries ordered by
1092177633Sdfr * increasing values of lf_start.
1093177633Sdfr */
1094177633Sdfrstatic void
1095177633Sdfrlf_insert_lock(struct lockf *state, struct lockf_entry *lock)
1096177633Sdfr{
1097177633Sdfr	struct lockf_entry *lf, *lfprev;
1098177633Sdfr
1099177633Sdfr	if (LIST_EMPTY(&state->ls_active)) {
1100177633Sdfr		LIST_INSERT_HEAD(&state->ls_active, lock, lf_link);
1101177633Sdfr		return;
1102177633Sdfr	}
1103177633Sdfr
1104177633Sdfr	lfprev = NULL;
1105177633Sdfr	LIST_FOREACH(lf, &state->ls_active, lf_link) {
1106177633Sdfr		if (lf->lf_start > lock->lf_start) {
1107177633Sdfr			LIST_INSERT_BEFORE(lf, lock, lf_link);
1108177633Sdfr			return;
1109177633Sdfr		}
1110177633Sdfr		lfprev = lf;
1111177633Sdfr	}
1112177633Sdfr	LIST_INSERT_AFTER(lfprev, lock, lf_link);
1113177633Sdfr}
1114177633Sdfr
1115177633Sdfr/*
1116177633Sdfr * Wake up a sleeping lock and remove it from the pending list now
1117298819Spfg * that all its dependencies have been resolved. The caller should
1118177633Sdfr * arrange for the lock to be added to the active list, adjusting any
1119177633Sdfr * existing locks for the same owner as needed.
1120177633Sdfr */
1121177633Sdfrstatic void
1122177633Sdfrlf_wakeup_lock(struct lockf *state, struct lockf_entry *wakelock)
1123177633Sdfr{
1124177633Sdfr
1125177633Sdfr	/*
1126177633Sdfr	 * Remove from ls_pending list and wake up the caller
1127177633Sdfr	 * or start the async notification, as appropriate.
1128177633Sdfr	 */
1129177633Sdfr	LIST_REMOVE(wakelock, lf_link);
1130177633Sdfr#ifdef LOCKF_DEBUG
1131177633Sdfr	if (lockf_debug & 1)
1132177633Sdfr		lf_print("lf_wakeup_lock: awakening", wakelock);
1133177633Sdfr#endif /* LOCKF_DEBUG */
1134177633Sdfr	if (wakelock->lf_async_task) {
1135177633Sdfr		taskqueue_enqueue(taskqueue_thread, wakelock->lf_async_task);
1136177633Sdfr	} else {
1137177633Sdfr		wakeup(wakelock);
1138177633Sdfr	}
1139177633Sdfr}
1140177633Sdfr
1141177633Sdfr/*
1142298819Spfg * Re-check all dependent locks and remove edges to locks that we no
1143177633Sdfr * longer block. If 'all' is non-zero, the lock has been removed and
1144298819Spfg * we must remove all the dependencies, otherwise it has simply been
1145177633Sdfr * reduced but remains active. Any pending locks which have been been
1146177633Sdfr * unblocked are added to 'granted'
1147177633Sdfr */
1148177633Sdfrstatic void
1149177633Sdfrlf_update_dependancies(struct lockf *state, struct lockf_entry *lock, int all,
1150177633Sdfr	struct lockf_entry_list *granted)
1151177633Sdfr{
1152177633Sdfr	struct lockf_edge *e, *ne;
1153177633Sdfr	struct lockf_entry *deplock;
1154177633Sdfr
1155177633Sdfr	LIST_FOREACH_SAFE(e, &lock->lf_inedges, le_inlink, ne) {
1156177633Sdfr		deplock = e->le_from;
1157177633Sdfr		if (all || !lf_blocks(lock, deplock)) {
1158177633Sdfr			sx_xlock(&lf_owner_graph_lock);
1159177633Sdfr			lf_remove_edge(e);
1160177633Sdfr			sx_xunlock(&lf_owner_graph_lock);
1161177633Sdfr			if (LIST_EMPTY(&deplock->lf_outedges)) {
1162177633Sdfr				lf_wakeup_lock(state, deplock);
1163177633Sdfr				LIST_INSERT_HEAD(granted, deplock, lf_link);
1164177633Sdfr			}
1165177633Sdfr		}
1166177633Sdfr	}
1167177633Sdfr}
1168177633Sdfr
1169177633Sdfr/*
1170298819Spfg * Set the start of an existing active lock, updating dependencies and
1171177633Sdfr * adding any newly woken locks to 'granted'.
1172177633Sdfr */
1173177633Sdfrstatic void
1174177633Sdfrlf_set_start(struct lockf *state, struct lockf_entry *lock, off_t new_start,
1175177633Sdfr	struct lockf_entry_list *granted)
1176177633Sdfr{
1177177633Sdfr
1178177633Sdfr	KASSERT(new_start >= lock->lf_start, ("can't increase lock"));
1179177633Sdfr	lock->lf_start = new_start;
1180177633Sdfr	LIST_REMOVE(lock, lf_link);
1181177633Sdfr	lf_insert_lock(state, lock);
1182177633Sdfr	lf_update_dependancies(state, lock, FALSE, granted);
1183177633Sdfr}
1184177633Sdfr
1185177633Sdfr/*
1186298819Spfg * Set the end of an existing active lock, updating dependencies and
1187177633Sdfr * adding any newly woken locks to 'granted'.
1188177633Sdfr */
1189177633Sdfrstatic void
1190177633Sdfrlf_set_end(struct lockf *state, struct lockf_entry *lock, off_t new_end,
1191177633Sdfr	struct lockf_entry_list *granted)
1192177633Sdfr{
1193177633Sdfr
1194177633Sdfr	KASSERT(new_end <= lock->lf_end, ("can't increase lock"));
1195177633Sdfr	lock->lf_end = new_end;
1196177633Sdfr	lf_update_dependancies(state, lock, FALSE, granted);
1197177633Sdfr}
1198177633Sdfr
1199177633Sdfr/*
1200177633Sdfr * Add a lock to the active list, updating or removing any current
1201177633Sdfr * locks owned by the same owner and processing any pending locks that
1202177633Sdfr * become unblocked as a result. This code is also used for unlock
1203177633Sdfr * since the logic for updating existing locks is identical.
1204177633Sdfr *
1205177633Sdfr * As a result of processing the new lock, we may unblock existing
1206177633Sdfr * pending locks as a result of downgrading/unlocking. We simply
1207177633Sdfr * activate the newly granted locks by looping.
1208177633Sdfr *
1209298819Spfg * Since the new lock already has its dependencies set up, we always
1210177633Sdfr * add it to the list (unless its an unlock request). This may
1211177633Sdfr * fragment the lock list in some pathological cases but its probably
1212177633Sdfr * not a real problem.
1213177633Sdfr */
1214177633Sdfrstatic void
1215177633Sdfrlf_activate_lock(struct lockf *state, struct lockf_entry *lock)
1216177633Sdfr{
1217177633Sdfr	struct lockf_entry *overlap, *lf;
1218177633Sdfr	struct lockf_entry_list granted;
1219177633Sdfr	int ovcase;
1220177633Sdfr
1221177633Sdfr	LIST_INIT(&granted);
1222177633Sdfr	LIST_INSERT_HEAD(&granted, lock, lf_link);
1223177633Sdfr
1224177633Sdfr	while (!LIST_EMPTY(&granted)) {
1225177633Sdfr		lock = LIST_FIRST(&granted);
1226177633Sdfr		LIST_REMOVE(lock, lf_link);
1227177633Sdfr
1228177633Sdfr		/*
1229177633Sdfr		 * Skip over locks owned by other processes.  Handle
1230177633Sdfr		 * any locks that overlap and are owned by ourselves.
1231177633Sdfr		 */
1232177633Sdfr		overlap = LIST_FIRST(&state->ls_active);
1233177633Sdfr		for (;;) {
1234177633Sdfr			ovcase = lf_findoverlap(&overlap, lock, SELF);
1235177633Sdfr
1236177633Sdfr#ifdef LOCKF_DEBUG
1237177633Sdfr			if (ovcase && (lockf_debug & 2)) {
1238177633Sdfr				printf("lf_setlock: overlap %d", ovcase);
1239177633Sdfr				lf_print("", overlap);
1240177633Sdfr			}
1241177633Sdfr#endif
1242177633Sdfr			/*
1243177633Sdfr			 * Six cases:
1244177633Sdfr			 *	0) no overlap
1245177633Sdfr			 *	1) overlap == lock
1246177633Sdfr			 *	2) overlap contains lock
1247177633Sdfr			 *	3) lock contains overlap
1248177633Sdfr			 *	4) overlap starts before lock
1249177633Sdfr			 *	5) overlap ends after lock
1250177633Sdfr			 */
1251177633Sdfr			switch (ovcase) {
1252177633Sdfr			case 0: /* no overlap */
1253177633Sdfr				break;
1254177633Sdfr
1255177633Sdfr			case 1: /* overlap == lock */
1256177633Sdfr				/*
1257177633Sdfr				 * We have already setup the
1258177633Sdfr				 * dependants for the new lock, taking
1259177633Sdfr				 * into account a possible downgrade
1260177633Sdfr				 * or unlock. Remove the old lock.
1261177633Sdfr				 */
1262177633Sdfr				LIST_REMOVE(overlap, lf_link);
1263177633Sdfr				lf_update_dependancies(state, overlap, TRUE,
1264177633Sdfr					&granted);
1265177633Sdfr				lf_free_lock(overlap);
1266177633Sdfr				break;
1267177633Sdfr
1268177633Sdfr			case 2: /* overlap contains lock */
1269177633Sdfr				/*
1270177633Sdfr				 * Just split the existing lock.
1271177633Sdfr				 */
1272177633Sdfr				lf_split(state, overlap, lock, &granted);
1273177633Sdfr				break;
1274177633Sdfr
1275177633Sdfr			case 3: /* lock contains overlap */
1276177633Sdfr				/*
1277177633Sdfr				 * Delete the overlap and advance to
1278177633Sdfr				 * the next entry in the list.
1279177633Sdfr				 */
1280177633Sdfr				lf = LIST_NEXT(overlap, lf_link);
1281177633Sdfr				LIST_REMOVE(overlap, lf_link);
1282177633Sdfr				lf_update_dependancies(state, overlap, TRUE,
1283177633Sdfr					&granted);
1284177633Sdfr				lf_free_lock(overlap);
1285177633Sdfr				overlap = lf;
1286177633Sdfr				continue;
1287177633Sdfr
1288177633Sdfr			case 4: /* overlap starts before lock */
1289177633Sdfr				/*
1290177633Sdfr				 * Just update the overlap end and
1291177633Sdfr				 * move on.
1292177633Sdfr				 */
1293177633Sdfr				lf_set_end(state, overlap, lock->lf_start - 1,
1294177633Sdfr				    &granted);
1295177633Sdfr				overlap = LIST_NEXT(overlap, lf_link);
1296177633Sdfr				continue;
1297177633Sdfr
1298177633Sdfr			case 5: /* overlap ends after lock */
1299177633Sdfr				/*
1300177633Sdfr				 * Change the start of overlap and
1301177633Sdfr				 * re-insert.
1302177633Sdfr				 */
1303177633Sdfr				lf_set_start(state, overlap, lock->lf_end + 1,
1304177633Sdfr				    &granted);
1305177633Sdfr				break;
1306177633Sdfr			}
1307177633Sdfr			break;
1308177633Sdfr		}
1309177633Sdfr#ifdef LOCKF_DEBUG
1310177633Sdfr		if (lockf_debug & 1) {
1311177633Sdfr			if (lock->lf_type != F_UNLCK)
1312177633Sdfr				lf_print("lf_activate_lock: activated", lock);
1313177633Sdfr			else
1314177633Sdfr				lf_print("lf_activate_lock: unlocked", lock);
1315177633Sdfr			lf_printlist("lf_activate_lock", lock);
1316177633Sdfr		}
1317177633Sdfr#endif /* LOCKF_DEBUG */
1318177633Sdfr		if (lock->lf_type != F_UNLCK)
1319177633Sdfr			lf_insert_lock(state, lock);
1320177633Sdfr	}
1321177633Sdfr}
1322177633Sdfr
1323177633Sdfr/*
1324177633Sdfr * Cancel a pending lock request, either as a result of a signal or a
1325177633Sdfr * cancel request for an async lock.
1326177633Sdfr */
1327177633Sdfrstatic void
1328177633Sdfrlf_cancel_lock(struct lockf *state, struct lockf_entry *lock)
1329177633Sdfr{
1330177633Sdfr	struct lockf_entry_list granted;
1331177633Sdfr
1332177633Sdfr	/*
1333177633Sdfr	 * Note it is theoretically possible that cancelling this lock
1334177633Sdfr	 * may allow some other pending lock to become
1335177633Sdfr	 * active. Consider this case:
1336177633Sdfr	 *
1337298819Spfg	 * Owner	Action		Result		Dependencies
1338177633Sdfr	 *
1339177633Sdfr	 * A:		lock [0..0]	succeeds
1340177633Sdfr	 * B:		lock [2..2]	succeeds
1341177633Sdfr	 * C:		lock [1..2]	blocked		C->B
1342177633Sdfr	 * D:		lock [0..1]	blocked		C->B,D->A,D->C
1343177633Sdfr	 * A:		unlock [0..0]			C->B,D->C
1344177633Sdfr	 * C:		cancel [1..2]
1345177633Sdfr	 */
1346177633Sdfr
1347177633Sdfr	LIST_REMOVE(lock, lf_link);
1348177633Sdfr
1349177633Sdfr	/*
1350177633Sdfr	 * Removing out-going edges is simple.
1351177633Sdfr	 */
1352177633Sdfr	sx_xlock(&lf_owner_graph_lock);
1353177633Sdfr	lf_remove_outgoing(lock);
1354177633Sdfr	sx_xunlock(&lf_owner_graph_lock);
1355177633Sdfr
1356177633Sdfr	/*
1357177633Sdfr	 * Removing in-coming edges may allow some other lock to
1358177633Sdfr	 * become active - we use lf_update_dependancies to figure
1359177633Sdfr	 * this out.
1360177633Sdfr	 */
1361177633Sdfr	LIST_INIT(&granted);
1362177633Sdfr	lf_update_dependancies(state, lock, TRUE, &granted);
1363177633Sdfr	lf_free_lock(lock);
1364177633Sdfr
1365177633Sdfr	/*
1366177633Sdfr	 * Feed any newly active locks to lf_activate_lock.
1367177633Sdfr	 */
1368177633Sdfr	while (!LIST_EMPTY(&granted)) {
1369177633Sdfr		lock = LIST_FIRST(&granted);
1370177633Sdfr		LIST_REMOVE(lock, lf_link);
1371177633Sdfr		lf_activate_lock(state, lock);
1372177633Sdfr	}
1373177633Sdfr}
1374177633Sdfr
1375177633Sdfr/*
13761960Sdg * Set a byte-range lock.
13771960Sdg */
137812819Sphkstatic int
1379177633Sdfrlf_setlock(struct lockf *state, struct lockf_entry *lock, struct vnode *vp,
1380177633Sdfr    void **cookiep)
13811960Sdg{
13821960Sdg	static char lockstr[] = "lockf";
1383302216Skib	int error, priority, stops_deferred;
13841960Sdg
13851960Sdg#ifdef LOCKF_DEBUG
13861960Sdg	if (lockf_debug & 1)
13871960Sdg		lf_print("lf_setlock", lock);
13881960Sdg#endif /* LOCKF_DEBUG */
13891960Sdg
13901960Sdg	/*
13911960Sdg	 * Set the priority
13921960Sdg	 */
13931960Sdg	priority = PLOCK;
13941960Sdg	if (lock->lf_type == F_WRLCK)
13951960Sdg		priority += 4;
1396180025Sdfr	if (!(lock->lf_flags & F_NOINTR))
1397180025Sdfr		priority |= PCATCH;
13981960Sdg	/*
13991960Sdg	 * Scan lock list for this file looking for locks that would block us.
14001960Sdg	 */
1401192681Skib	if (lf_getblock(state, lock)) {
14021960Sdg		/*
14031960Sdg		 * Free the structure and return if nonblocking.
14041960Sdg		 */
1405177633Sdfr		if ((lock->lf_flags & F_WAIT) == 0
1406177633Sdfr		    && lock->lf_async_task == NULL) {
1407177633Sdfr			lf_free_lock(lock);
1408177633Sdfr			error = EAGAIN;
1409177633Sdfr			goto out;
14101960Sdg		}
1411177633Sdfr
14121960Sdg		/*
1413178873Sdfr		 * For flock type locks, we must first remove
1414178873Sdfr		 * any shared locks that we hold before we sleep
1415178873Sdfr		 * waiting for an exclusive lock.
1416178873Sdfr		 */
1417178873Sdfr		if ((lock->lf_flags & F_FLOCK) &&
1418178873Sdfr		    lock->lf_type == F_WRLCK) {
1419178873Sdfr			lock->lf_type = F_UNLCK;
1420178873Sdfr			lf_activate_lock(state, lock);
1421178873Sdfr			lock->lf_type = F_WRLCK;
1422178873Sdfr		}
1423178873Sdfr
1424178873Sdfr		/*
1425177633Sdfr		 * We are blocked. Create edges to each blocking lock,
1426177633Sdfr		 * checking for deadlock using the owner graph. For
1427177633Sdfr		 * simplicity, we run deadlock detection for all
1428177633Sdfr		 * locks, posix and otherwise.
14291960Sdg		 */
1430177633Sdfr		sx_xlock(&lf_owner_graph_lock);
1431177633Sdfr		error = lf_add_outgoing(state, lock);
1432177633Sdfr		sx_xunlock(&lf_owner_graph_lock);
14331960Sdg
1434177633Sdfr		if (error) {
1435177633Sdfr#ifdef LOCKF_DEBUG
1436177633Sdfr			if (lockf_debug & 1)
1437177633Sdfr				lf_print("lf_setlock: deadlock", lock);
1438177633Sdfr#endif
1439177633Sdfr			lf_free_lock(lock);
1440177633Sdfr			goto out;
14411960Sdg		}
1442177633Sdfr
14431960Sdg		/*
1444177633Sdfr		 * We have added edges to everything that blocks
1445177633Sdfr		 * us. Sleep until they all go away.
14461960Sdg		 */
1447177633Sdfr		LIST_INSERT_HEAD(&state->ls_pending, lock, lf_link);
14481960Sdg#ifdef LOCKF_DEBUG
14491960Sdg		if (lockf_debug & 1) {
1450177633Sdfr			struct lockf_edge *e;
1451177633Sdfr			LIST_FOREACH(e, &lock->lf_outedges, le_outlink) {
1452177633Sdfr				lf_print("lf_setlock: blocking on", e->le_to);
1453177633Sdfr				lf_printlist("lf_setlock", e->le_to);
1454177633Sdfr			}
14551960Sdg		}
14561960Sdg#endif /* LOCKF_DEBUG */
1457177633Sdfr
1458177633Sdfr		if ((lock->lf_flags & F_WAIT) == 0) {
1459177633Sdfr			/*
1460177633Sdfr			 * The caller requested async notification -
1461177633Sdfr			 * this callback happens when the blocking
1462177633Sdfr			 * lock is released, allowing the caller to
1463177633Sdfr			 * make another attempt to take the lock.
1464177633Sdfr			 */
1465177633Sdfr			*cookiep = (void *) lock;
1466177633Sdfr			error = EINPROGRESS;
1467177633Sdfr			goto out;
1468177633Sdfr		}
1469177633Sdfr
1470192685Skib		lock->lf_refs++;
1471302216Skib		stops_deferred = sigdeferstop(SIGDEFERSTOP_ERESTART);
1472177633Sdfr		error = sx_sleep(lock, &state->ls_lock, priority, lockstr, 0);
1473302216Skib		sigallowstop(stops_deferred);
1474192685Skib		if (lf_free_lock(lock)) {
1475268384Skib			error = EDOOFUS;
1476192685Skib			goto out;
1477192685Skib		}
1478192685Skib
147948556Sbde		/*
148048556Sbde		 * We may have been awakened by a signal and/or by a
1481177633Sdfr		 * debugger continuing us (in which cases we must
1482177633Sdfr		 * remove our lock graph edges) and/or by another
1483177633Sdfr		 * process releasing a lock (in which case our edges
1484177633Sdfr		 * have already been removed and we have been moved to
1485178243Skib		 * the active list). We may also have been woken by
1486178243Skib		 * lf_purgelocks which we report to the caller as
1487178243Skib		 * EINTR. In that case, lf_purgelocks will have
1488178243Skib		 * removed our lock graph edges.
1489177633Sdfr		 *
1490177633Sdfr		 * Note that it is possible to receive a signal after
1491177633Sdfr		 * we were successfully woken (and moved to the active
1492177633Sdfr		 * list) but before we resumed execution. In this
1493177633Sdfr		 * case, our lf_outedges list will be clear. We
1494177633Sdfr		 * pretend there was no error.
1495177633Sdfr		 *
1496177633Sdfr		 * Note also, if we have been sleeping long enough, we
1497177633Sdfr		 * may now have incoming edges from some newer lock
1498177633Sdfr		 * which is waiting behind us in the queue.
149948556Sbde		 */
1500178243Skib		if (lock->lf_flags & F_INTR) {
1501178243Skib			error = EINTR;
1502178243Skib			lf_free_lock(lock);
1503178243Skib			goto out;
1504178243Skib		}
1505177633Sdfr		if (LIST_EMPTY(&lock->lf_outedges)) {
1506177633Sdfr			error = 0;
1507177633Sdfr		} else {
1508177633Sdfr			lf_cancel_lock(state, lock);
1509177633Sdfr			goto out;
15101960Sdg		}
1511177633Sdfr#ifdef LOCKF_DEBUG
1512177633Sdfr		if (lockf_debug & 1) {
1513177633Sdfr			lf_print("lf_setlock: granted", lock);
151448556Sbde		}
1515177633Sdfr#endif
1516177633Sdfr		goto out;
15171960Sdg	}
15181960Sdg	/*
1519177633Sdfr	 * It looks like we are going to grant the lock. First add
1520177633Sdfr	 * edges from any currently pending lock that the new lock
1521177633Sdfr	 * would block.
1522177633Sdfr	 */
1523177633Sdfr	sx_xlock(&lf_owner_graph_lock);
1524177633Sdfr	error = lf_add_incoming(state, lock);
1525177633Sdfr	sx_xunlock(&lf_owner_graph_lock);
1526177633Sdfr	if (error) {
1527177633Sdfr#ifdef LOCKF_DEBUG
1528177633Sdfr		if (lockf_debug & 1)
1529177633Sdfr			lf_print("lf_setlock: deadlock", lock);
1530177633Sdfr#endif
1531177633Sdfr		lf_free_lock(lock);
1532177633Sdfr		goto out;
1533177633Sdfr	}
1534177633Sdfr
1535177633Sdfr	/*
15361960Sdg	 * No blocks!!  Add the lock.  Note that we will
15371960Sdg	 * downgrade or upgrade any overlapping locks this
15381960Sdg	 * process already owns.
15391960Sdg	 */
1540177633Sdfr	lf_activate_lock(state, lock);
1541177633Sdfr	error = 0;
1542177633Sdfrout:
1543177633Sdfr	return (error);
15441960Sdg}
15451960Sdg
15461960Sdg/*
15471960Sdg * Remove a byte-range lock on an inode.
15481960Sdg *
15491960Sdg * Generally, find the lock (or an overlap to that lock)
15501960Sdg * and remove it (or shrink it), then wakeup anyone we can.
15511960Sdg */
155212819Sphkstatic int
1553177633Sdfrlf_clearlock(struct lockf *state, struct lockf_entry *unlock)
15541960Sdg{
1555177633Sdfr	struct lockf_entry *overlap;
15561960Sdg
1557177633Sdfr	overlap = LIST_FIRST(&state->ls_active);
1558177633Sdfr
1559177633Sdfr	if (overlap == NOLOCKF)
15601960Sdg		return (0);
15611960Sdg#ifdef LOCKF_DEBUG
15621960Sdg	if (unlock->lf_type != F_UNLCK)
15631960Sdg		panic("lf_clearlock: bad type");
15641960Sdg	if (lockf_debug & 1)
15651960Sdg		lf_print("lf_clearlock", unlock);
15661960Sdg#endif /* LOCKF_DEBUG */
15671960Sdg
1568177633Sdfr	lf_activate_lock(state, unlock);
15691960Sdg
15701960Sdg	return (0);
15711960Sdg}
15721960Sdg
15731960Sdg/*
1574177633Sdfr * Check whether there is a blocking lock, and if so return its
1575177633Sdfr * details in '*fl'.
15761960Sdg */
157712819Sphkstatic int
1578177633Sdfrlf_getlock(struct lockf *state, struct lockf_entry *lock, struct flock *fl)
15791960Sdg{
1580177633Sdfr	struct lockf_entry *block;
15811960Sdg
15821960Sdg#ifdef LOCKF_DEBUG
15831960Sdg	if (lockf_debug & 1)
15841960Sdg		lf_print("lf_getlock", lock);
15851960Sdg#endif /* LOCKF_DEBUG */
15861960Sdg
1587177633Sdfr	if ((block = lf_getblock(state, lock))) {
15881960Sdg		fl->l_type = block->lf_type;
15891960Sdg		fl->l_whence = SEEK_SET;
15901960Sdg		fl->l_start = block->lf_start;
1591177633Sdfr		if (block->lf_end == OFF_MAX)
15921960Sdg			fl->l_len = 0;
15931960Sdg		else
15941960Sdg			fl->l_len = block->lf_end - block->lf_start + 1;
1595177633Sdfr		fl->l_pid = block->lf_owner->lo_pid;
1596177633Sdfr		fl->l_sysid = block->lf_owner->lo_sysid;
15971960Sdg	} else {
15981960Sdg		fl->l_type = F_UNLCK;
15991960Sdg	}
16001960Sdg	return (0);
16011960Sdg}
16021960Sdg
16031960Sdg/*
1604177633Sdfr * Cancel an async lock request.
1605177633Sdfr */
1606177633Sdfrstatic int
1607177633Sdfrlf_cancel(struct lockf *state, struct lockf_entry *lock, void *cookie)
1608177633Sdfr{
1609177633Sdfr	struct lockf_entry *reallock;
1610177633Sdfr
1611177633Sdfr	/*
1612177633Sdfr	 * We need to match this request with an existing lock
1613177633Sdfr	 * request.
1614177633Sdfr	 */
1615177633Sdfr	LIST_FOREACH(reallock, &state->ls_pending, lf_link) {
1616177633Sdfr		if ((void *) reallock == cookie) {
1617177633Sdfr			/*
1618177633Sdfr			 * Double-check that this lock looks right
1619177633Sdfr			 * (maybe use a rolling ID for the cancel
1620177633Sdfr			 * cookie instead?)
1621177633Sdfr			 */
1622177633Sdfr			if (!(reallock->lf_vnode == lock->lf_vnode
1623177633Sdfr				&& reallock->lf_start == lock->lf_start
1624177633Sdfr				&& reallock->lf_end == lock->lf_end)) {
1625177633Sdfr				return (ENOENT);
1626177633Sdfr			}
1627177633Sdfr
1628177633Sdfr			/*
1629177633Sdfr			 * Make sure this lock was async and then just
1630177633Sdfr			 * remove it from its wait lists.
1631177633Sdfr			 */
1632177633Sdfr			if (!reallock->lf_async_task) {
1633177633Sdfr				return (ENOENT);
1634177633Sdfr			}
1635177633Sdfr
1636177633Sdfr			/*
1637177633Sdfr			 * Note that since any other thread must take
1638177633Sdfr			 * state->ls_lock before it can possibly
1639177633Sdfr			 * trigger the async callback, we are safe
1640177633Sdfr			 * from a race with lf_wakeup_lock, i.e. we
1641177633Sdfr			 * can free the lock (actually our caller does
1642177633Sdfr			 * this).
1643177633Sdfr			 */
1644177633Sdfr			lf_cancel_lock(state, reallock);
1645177633Sdfr			return (0);
1646177633Sdfr		}
1647177633Sdfr	}
1648177633Sdfr
1649177633Sdfr	/*
1650177633Sdfr	 * We didn't find a matching lock - not much we can do here.
1651177633Sdfr	 */
1652177633Sdfr	return (ENOENT);
1653177633Sdfr}
1654177633Sdfr
1655177633Sdfr/*
16561960Sdg * Walk the list of locks for an inode and
16571960Sdg * return the first blocking lock.
16581960Sdg */
1659177633Sdfrstatic struct lockf_entry *
1660177633Sdfrlf_getblock(struct lockf *state, struct lockf_entry *lock)
16611960Sdg{
1662177633Sdfr	struct lockf_entry *overlap;
16631960Sdg
1664177633Sdfr	LIST_FOREACH(overlap, &state->ls_active, lf_link) {
16651960Sdg		/*
1666177633Sdfr		 * We may assume that the active list is sorted by
1667177633Sdfr		 * lf_start.
16681960Sdg		 */
1669177633Sdfr		if (overlap->lf_start > lock->lf_end)
1670177633Sdfr			break;
1671177633Sdfr		if (!lf_blocks(lock, overlap))
1672177633Sdfr			continue;
1673177633Sdfr		return (overlap);
16741960Sdg	}
16751960Sdg	return (NOLOCKF);
16761960Sdg}
16771960Sdg
16781960Sdg/*
1679177633Sdfr * Walk the list of locks for an inode to find an overlapping lock (if
1680177633Sdfr * any) and return a classification of that overlap.
16811960Sdg *
1682177633Sdfr * Arguments:
1683177633Sdfr *	*overlap	The place in the lock list to start looking
1684177633Sdfr *	lock		The lock which is being tested
1685177633Sdfr *	type		Pass 'SELF' to test only locks with the same
1686177633Sdfr *			owner as lock, or 'OTHER' to test only locks
1687177633Sdfr *			with a different owner
1688177633Sdfr *
1689177633Sdfr * Returns one of six values:
1690177633Sdfr *	0) no overlap
1691177633Sdfr *	1) overlap == lock
1692177633Sdfr *	2) overlap contains lock
1693177633Sdfr *	3) lock contains overlap
1694177633Sdfr *	4) overlap starts before lock
1695177633Sdfr *	5) overlap ends after lock
1696177633Sdfr *
1697177633Sdfr * If there is an overlapping lock, '*overlap' is set to point at the
1698177633Sdfr * overlapping lock.
1699177633Sdfr *
17001960Sdg * NOTE: this returns only the FIRST overlapping lock.  There
17011960Sdg *	 may be more than one.
17021960Sdg */
170312819Sphkstatic int
1704177633Sdfrlf_findoverlap(struct lockf_entry **overlap, struct lockf_entry *lock, int type)
17051960Sdg{
1706177633Sdfr	struct lockf_entry *lf;
17071960Sdg	off_t start, end;
1708177633Sdfr	int res;
17091960Sdg
1710177633Sdfr	if ((*overlap) == NOLOCKF) {
17111960Sdg		return (0);
1712177633Sdfr	}
17131960Sdg#ifdef LOCKF_DEBUG
17141960Sdg	if (lockf_debug & 2)
17151960Sdg		lf_print("lf_findoverlap: looking for overlap in", lock);
17161960Sdg#endif /* LOCKF_DEBUG */
17171960Sdg	start = lock->lf_start;
17181960Sdg	end = lock->lf_end;
1719177633Sdfr	res = 0;
1720177633Sdfr	while (*overlap) {
1721177633Sdfr		lf = *overlap;
1722177633Sdfr		if (lf->lf_start > end)
1723177633Sdfr			break;
1724177633Sdfr		if (((type & SELF) && lf->lf_owner != lock->lf_owner) ||
1725177633Sdfr		    ((type & OTHERS) && lf->lf_owner == lock->lf_owner)) {
1726177633Sdfr			*overlap = LIST_NEXT(lf, lf_link);
17271960Sdg			continue;
17281960Sdg		}
17291960Sdg#ifdef LOCKF_DEBUG
17301960Sdg		if (lockf_debug & 2)
17311960Sdg			lf_print("\tchecking", lf);
17321960Sdg#endif /* LOCKF_DEBUG */
17331960Sdg		/*
17341960Sdg		 * OK, check for overlap
17351960Sdg		 *
17361960Sdg		 * Six cases:
17371960Sdg		 *	0) no overlap
17381960Sdg		 *	1) overlap == lock
17391960Sdg		 *	2) overlap contains lock
17401960Sdg		 *	3) lock contains overlap
17411960Sdg		 *	4) overlap starts before lock
17421960Sdg		 *	5) overlap ends after lock
17431960Sdg		 */
1744177633Sdfr		if (start > lf->lf_end) {
17451960Sdg			/* Case 0 */
17461960Sdg#ifdef LOCKF_DEBUG
17471960Sdg			if (lockf_debug & 2)
17481960Sdg				printf("no overlap\n");
17491960Sdg#endif /* LOCKF_DEBUG */
1750177633Sdfr			*overlap = LIST_NEXT(lf, lf_link);
17511960Sdg			continue;
17521960Sdg		}
1753177633Sdfr		if (lf->lf_start == start && lf->lf_end == end) {
17541960Sdg			/* Case 1 */
17551960Sdg#ifdef LOCKF_DEBUG
17561960Sdg			if (lockf_debug & 2)
17571960Sdg				printf("overlap == lock\n");
17581960Sdg#endif /* LOCKF_DEBUG */
1759177633Sdfr			res = 1;
1760177633Sdfr			break;
17611960Sdg		}
1762177633Sdfr		if (lf->lf_start <= start && lf->lf_end >= end) {
17631960Sdg			/* Case 2 */
17641960Sdg#ifdef LOCKF_DEBUG
17651960Sdg			if (lockf_debug & 2)
17661960Sdg				printf("overlap contains lock\n");
17671960Sdg#endif /* LOCKF_DEBUG */
1768177633Sdfr			res = 2;
1769177633Sdfr			break;
17701960Sdg		}
1771177633Sdfr		if (start <= lf->lf_start && end >= lf->lf_end) {
17721960Sdg			/* Case 3 */
17731960Sdg#ifdef LOCKF_DEBUG
17741960Sdg			if (lockf_debug & 2)
17751960Sdg				printf("lock contains overlap\n");
17761960Sdg#endif /* LOCKF_DEBUG */
1777177633Sdfr			res = 3;
1778177633Sdfr			break;
17791960Sdg		}
1780177633Sdfr		if (lf->lf_start < start && lf->lf_end >= start) {
17811960Sdg			/* Case 4 */
17821960Sdg#ifdef LOCKF_DEBUG
17831960Sdg			if (lockf_debug & 2)
17841960Sdg				printf("overlap starts before lock\n");
17851960Sdg#endif /* LOCKF_DEBUG */
1786177633Sdfr			res = 4;
1787177633Sdfr			break;
17881960Sdg		}
1789177633Sdfr		if (lf->lf_start > start && lf->lf_end > end) {
17901960Sdg			/* Case 5 */
17911960Sdg#ifdef LOCKF_DEBUG
17921960Sdg			if (lockf_debug & 2)
17931960Sdg				printf("overlap ends after lock\n");
17941960Sdg#endif /* LOCKF_DEBUG */
1795177633Sdfr			res = 5;
1796177633Sdfr			break;
17971960Sdg		}
17981960Sdg		panic("lf_findoverlap: default");
17991960Sdg	}
1800177633Sdfr	return (res);
18011960Sdg}
18021960Sdg
18031960Sdg/*
1804177633Sdfr * Split an the existing 'lock1', based on the extent of the lock
1805177633Sdfr * described by 'lock2'. The existing lock should cover 'lock2'
1806177633Sdfr * entirely.
1807177633Sdfr *
1808177633Sdfr * Any pending locks which have been been unblocked are added to
1809177633Sdfr * 'granted'
18101960Sdg */
181112819Sphkstatic void
1812177633Sdfrlf_split(struct lockf *state, struct lockf_entry *lock1,
1813177633Sdfr    struct lockf_entry *lock2, struct lockf_entry_list *granted)
18141960Sdg{
1815177633Sdfr	struct lockf_entry *splitlock;
18161960Sdg
18171960Sdg#ifdef LOCKF_DEBUG
18181960Sdg	if (lockf_debug & 2) {
18191960Sdg		lf_print("lf_split", lock1);
18201960Sdg		lf_print("splitting from", lock2);
18211960Sdg	}
18221960Sdg#endif /* LOCKF_DEBUG */
18231960Sdg	/*
1824177633Sdfr	 * Check to see if we don't need to split at all.
18251960Sdg	 */
18261960Sdg	if (lock1->lf_start == lock2->lf_start) {
1827177633Sdfr		lf_set_start(state, lock1, lock2->lf_end + 1, granted);
18281960Sdg		return;
18291960Sdg	}
18301960Sdg	if (lock1->lf_end == lock2->lf_end) {
1831177633Sdfr		lf_set_end(state, lock1, lock2->lf_start - 1, granted);
18321960Sdg		return;
18331960Sdg	}
18341960Sdg	/*
18351960Sdg	 * Make a new lock consisting of the last part of
1836177633Sdfr	 * the encompassing lock.
18371960Sdg	 */
1838177633Sdfr	splitlock = lf_alloc_lock(lock1->lf_owner);
1839177633Sdfr	memcpy(splitlock, lock1, sizeof *splitlock);
1840192685Skib	splitlock->lf_refs = 1;
1841177633Sdfr	if (splitlock->lf_flags & F_REMOTE)
1842177633Sdfr		vref(splitlock->lf_vnode);
1843177633Sdfr
1844177633Sdfr	/*
1845177633Sdfr	 * This cannot cause a deadlock since any edges we would add
1846177633Sdfr	 * to splitlock already exist in lock1. We must be sure to add
1847298819Spfg	 * necessary dependencies to splitlock before we reduce lock1
1848177633Sdfr	 * otherwise we may accidentally grant a pending lock that
1849177633Sdfr	 * was blocked by the tail end of lock1.
1850177633Sdfr	 */
18511960Sdg	splitlock->lf_start = lock2->lf_end + 1;
1852177633Sdfr	LIST_INIT(&splitlock->lf_outedges);
1853177633Sdfr	LIST_INIT(&splitlock->lf_inedges);
1854177633Sdfr	sx_xlock(&lf_owner_graph_lock);
1855177633Sdfr	lf_add_incoming(state, splitlock);
1856177633Sdfr	sx_xunlock(&lf_owner_graph_lock);
1857177633Sdfr
1858177633Sdfr	lf_set_end(state, lock1, lock2->lf_start - 1, granted);
1859177633Sdfr
18601960Sdg	/*
18611960Sdg	 * OK, now link it in
18621960Sdg	 */
1863177633Sdfr	lf_insert_lock(state, splitlock);
18641960Sdg}
18651960Sdg
1866180025Sdfrstruct lockdesc {
1867180025Sdfr	STAILQ_ENTRY(lockdesc) link;
1868177633Sdfr	struct vnode *vp;
1869177633Sdfr	struct flock fl;
1870177633Sdfr};
1871180025SdfrSTAILQ_HEAD(lockdesclist, lockdesc);
1872177633Sdfr
1873180025Sdfrint
1874180025Sdfrlf_iteratelocks_sysid(int sysid, lf_iterator *fn, void *arg)
1875177633Sdfr{
1876177633Sdfr	struct lockf *ls;
1877177633Sdfr	struct lockf_entry *lf;
1878180025Sdfr	struct lockdesc *ldesc;
1879180025Sdfr	struct lockdesclist locks;
1880180025Sdfr	int error;
1881177633Sdfr
1882177633Sdfr	/*
1883177633Sdfr	 * In order to keep the locking simple, we iterate over the
1884177633Sdfr	 * active lock lists to build a list of locks that need
1885180025Sdfr	 * releasing. We then call the iterator for each one in turn.
1886177633Sdfr	 *
1887177633Sdfr	 * We take an extra reference to the vnode for the duration to
1888177633Sdfr	 * make sure it doesn't go away before we are finished.
1889177633Sdfr	 */
1890177633Sdfr	STAILQ_INIT(&locks);
1891177633Sdfr	sx_xlock(&lf_lock_states_lock);
1892177633Sdfr	LIST_FOREACH(ls, &lf_lock_states, ls_link) {
1893177633Sdfr		sx_xlock(&ls->ls_lock);
1894177633Sdfr		LIST_FOREACH(lf, &ls->ls_active, lf_link) {
1895177633Sdfr			if (lf->lf_owner->lo_sysid != sysid)
1896177633Sdfr				continue;
1897177633Sdfr
1898180025Sdfr			ldesc = malloc(sizeof(struct lockdesc), M_LOCKF,
1899177633Sdfr			    M_WAITOK);
1900180025Sdfr			ldesc->vp = lf->lf_vnode;
1901180025Sdfr			vref(ldesc->vp);
1902180025Sdfr			ldesc->fl.l_start = lf->lf_start;
1903177633Sdfr			if (lf->lf_end == OFF_MAX)
1904180025Sdfr				ldesc->fl.l_len = 0;
1905177633Sdfr			else
1906180025Sdfr				ldesc->fl.l_len =
1907177633Sdfr					lf->lf_end - lf->lf_start + 1;
1908180025Sdfr			ldesc->fl.l_whence = SEEK_SET;
1909180025Sdfr			ldesc->fl.l_type = F_UNLCK;
1910180025Sdfr			ldesc->fl.l_pid = lf->lf_owner->lo_pid;
1911180025Sdfr			ldesc->fl.l_sysid = sysid;
1912180025Sdfr			STAILQ_INSERT_TAIL(&locks, ldesc, link);
1913177633Sdfr		}
1914177633Sdfr		sx_xunlock(&ls->ls_lock);
1915177633Sdfr	}
1916177633Sdfr	sx_xunlock(&lf_lock_states_lock);
1917177633Sdfr
1918180025Sdfr	/*
1919180025Sdfr	 * Call the iterator function for each lock in turn. If the
1920180025Sdfr	 * iterator returns an error code, just free the rest of the
1921180025Sdfr	 * lockdesc structures.
1922180025Sdfr	 */
1923180025Sdfr	error = 0;
1924180025Sdfr	while ((ldesc = STAILQ_FIRST(&locks)) != NULL) {
1925177633Sdfr		STAILQ_REMOVE_HEAD(&locks, link);
1926180025Sdfr		if (!error)
1927180025Sdfr			error = fn(ldesc->vp, &ldesc->fl, arg);
1928180025Sdfr		vrele(ldesc->vp);
1929180025Sdfr		free(ldesc, M_LOCKF);
1930177633Sdfr	}
1931180025Sdfr
1932180025Sdfr	return (error);
1933177633Sdfr}
1934177633Sdfr
1935177633Sdfrint
1936180025Sdfrlf_iteratelocks_vnode(struct vnode *vp, lf_iterator *fn, void *arg)
1937180025Sdfr{
1938180025Sdfr	struct lockf *ls;
1939180025Sdfr	struct lockf_entry *lf;
1940180025Sdfr	struct lockdesc *ldesc;
1941180025Sdfr	struct lockdesclist locks;
1942180025Sdfr	int error;
1943180025Sdfr
1944180025Sdfr	/*
1945180025Sdfr	 * In order to keep the locking simple, we iterate over the
1946180025Sdfr	 * active lock lists to build a list of locks that need
1947180025Sdfr	 * releasing. We then call the iterator for each one in turn.
1948180025Sdfr	 *
1949180025Sdfr	 * We take an extra reference to the vnode for the duration to
1950180025Sdfr	 * make sure it doesn't go away before we are finished.
1951180025Sdfr	 */
1952180025Sdfr	STAILQ_INIT(&locks);
1953194993Skib	VI_LOCK(vp);
1954180025Sdfr	ls = vp->v_lockf;
1955194993Skib	if (!ls) {
1956194993Skib		VI_UNLOCK(vp);
1957180025Sdfr		return (0);
1958194993Skib	}
1959194993Skib	ls->ls_threads++;
1960194993Skib	VI_UNLOCK(vp);
1961180025Sdfr
1962180025Sdfr	sx_xlock(&ls->ls_lock);
1963180025Sdfr	LIST_FOREACH(lf, &ls->ls_active, lf_link) {
1964180025Sdfr		ldesc = malloc(sizeof(struct lockdesc), M_LOCKF,
1965180025Sdfr		    M_WAITOK);
1966180025Sdfr		ldesc->vp = lf->lf_vnode;
1967180025Sdfr		vref(ldesc->vp);
1968180025Sdfr		ldesc->fl.l_start = lf->lf_start;
1969180025Sdfr		if (lf->lf_end == OFF_MAX)
1970180025Sdfr			ldesc->fl.l_len = 0;
1971180025Sdfr		else
1972180025Sdfr			ldesc->fl.l_len =
1973180025Sdfr				lf->lf_end - lf->lf_start + 1;
1974180025Sdfr		ldesc->fl.l_whence = SEEK_SET;
1975180025Sdfr		ldesc->fl.l_type = F_UNLCK;
1976180025Sdfr		ldesc->fl.l_pid = lf->lf_owner->lo_pid;
1977180025Sdfr		ldesc->fl.l_sysid = lf->lf_owner->lo_sysid;
1978180025Sdfr		STAILQ_INSERT_TAIL(&locks, ldesc, link);
1979180025Sdfr	}
1980180025Sdfr	sx_xunlock(&ls->ls_lock);
1981194993Skib	VI_LOCK(vp);
1982194993Skib	ls->ls_threads--;
1983194993Skib	wakeup(ls);
1984194993Skib	VI_UNLOCK(vp);
1985180025Sdfr
1986180025Sdfr	/*
1987180025Sdfr	 * Call the iterator function for each lock in turn. If the
1988180025Sdfr	 * iterator returns an error code, just free the rest of the
1989180025Sdfr	 * lockdesc structures.
1990180025Sdfr	 */
1991180025Sdfr	error = 0;
1992180025Sdfr	while ((ldesc = STAILQ_FIRST(&locks)) != NULL) {
1993180025Sdfr		STAILQ_REMOVE_HEAD(&locks, link);
1994180025Sdfr		if (!error)
1995180025Sdfr			error = fn(ldesc->vp, &ldesc->fl, arg);
1996180025Sdfr		vrele(ldesc->vp);
1997180025Sdfr		free(ldesc, M_LOCKF);
1998180025Sdfr	}
1999180025Sdfr
2000180025Sdfr	return (error);
2001180025Sdfr}
2002180025Sdfr
2003180025Sdfrstatic int
2004180025Sdfrlf_clearremotesys_iterator(struct vnode *vp, struct flock *fl, void *arg)
2005180025Sdfr{
2006180025Sdfr
2007180025Sdfr	VOP_ADVLOCK(vp, 0, F_UNLCK, fl, F_REMOTE);
2008180025Sdfr	return (0);
2009180025Sdfr}
2010180025Sdfr
2011180025Sdfrvoid
2012180025Sdfrlf_clearremotesys(int sysid)
2013180025Sdfr{
2014180025Sdfr
2015180025Sdfr	KASSERT(sysid != 0, ("Can't clear local locks with F_UNLCKSYS"));
2016180025Sdfr	lf_iteratelocks_sysid(sysid, lf_clearremotesys_iterator, NULL);
2017180025Sdfr}
2018180025Sdfr
2019180025Sdfrint
2020177633Sdfrlf_countlocks(int sysid)
2021177633Sdfr{
2022177633Sdfr	int i;
2023177633Sdfr	struct lock_owner *lo;
2024177633Sdfr	int count;
2025177633Sdfr
2026177633Sdfr	count = 0;
2027177633Sdfr	sx_xlock(&lf_lock_owners_lock);
2028177633Sdfr	for (i = 0; i < LOCK_OWNER_HASH_SIZE; i++)
2029177633Sdfr		LIST_FOREACH(lo, &lf_lock_owners[i], lo_link)
2030177633Sdfr			if (lo->lo_sysid == sysid)
2031177633Sdfr				count += lo->lo_refs;
2032177633Sdfr	sx_xunlock(&lf_lock_owners_lock);
2033177633Sdfr
2034177633Sdfr	return (count);
2035177633Sdfr}
2036177633Sdfr
2037177633Sdfr#ifdef LOCKF_DEBUG
2038177633Sdfr
20391960Sdg/*
2040177633Sdfr * Return non-zero if y is reachable from x using a brute force
2041177633Sdfr * search. If reachable and path is non-null, return the route taken
2042177633Sdfr * in path.
20431960Sdg */
2044177633Sdfrstatic int
2045177633Sdfrgraph_reaches(struct owner_vertex *x, struct owner_vertex *y,
2046177633Sdfr    struct owner_vertex_list *path)
2047177633Sdfr{
2048177633Sdfr	struct owner_edge *e;
2049177633Sdfr
2050177633Sdfr	if (x == y) {
2051177633Sdfr		if (path)
2052177633Sdfr			TAILQ_INSERT_HEAD(path, x, v_link);
2053177633Sdfr		return 1;
2054177633Sdfr	}
2055177633Sdfr
2056177633Sdfr	LIST_FOREACH(e, &x->v_outedges, e_outlink) {
2057177633Sdfr		if (graph_reaches(e->e_to, y, path)) {
2058177633Sdfr			if (path)
2059177633Sdfr				TAILQ_INSERT_HEAD(path, x, v_link);
2060177633Sdfr			return 1;
2061177633Sdfr		}
2062177633Sdfr	}
2063177633Sdfr	return 0;
2064177633Sdfr}
2065177633Sdfr
2066177633Sdfr/*
2067177633Sdfr * Perform consistency checks on the graph. Make sure the values of
2068177633Sdfr * v_order are correct. If checkorder is non-zero, check no vertex can
2069177633Sdfr * reach any other vertex with a smaller order.
2070177633Sdfr */
207112819Sphkstatic void
2072177633Sdfrgraph_check(struct owner_graph *g, int checkorder)
20731960Sdg{
2074177633Sdfr	int i, j;
20751960Sdg
2076177633Sdfr	for (i = 0; i < g->g_size; i++) {
2077177633Sdfr		if (!g->g_vertices[i]->v_owner)
2078177633Sdfr			continue;
2079177633Sdfr		KASSERT(g->g_vertices[i]->v_order == i,
2080177633Sdfr		    ("lock graph vertices disordered"));
2081177633Sdfr		if (checkorder) {
2082177633Sdfr			for (j = 0; j < i; j++) {
2083177633Sdfr				if (!g->g_vertices[j]->v_owner)
2084177633Sdfr					continue;
2085177633Sdfr				KASSERT(!graph_reaches(g->g_vertices[i],
2086177633Sdfr					g->g_vertices[j], NULL),
2087177633Sdfr				    ("lock graph vertices disordered"));
2088177633Sdfr			}
2089177633Sdfr		}
2090177633Sdfr	}
2091177633Sdfr}
2092177633Sdfr
2093177633Sdfrstatic void
2094177633Sdfrgraph_print_vertices(struct owner_vertex_list *set)
2095177633Sdfr{
2096177633Sdfr	struct owner_vertex *v;
2097177633Sdfr
2098177633Sdfr	printf("{ ");
2099177633Sdfr	TAILQ_FOREACH(v, set, v_link) {
2100177633Sdfr		printf("%d:", v->v_order);
2101177633Sdfr		lf_print_owner(v->v_owner);
2102177633Sdfr		if (TAILQ_NEXT(v, v_link))
2103177633Sdfr			printf(", ");
2104177633Sdfr	}
2105177633Sdfr	printf(" }\n");
2106177633Sdfr}
2107177633Sdfr
2108177633Sdfr#endif
2109177633Sdfr
2110177633Sdfr/*
2111177633Sdfr * Calculate the sub-set of vertices v from the affected region [y..x]
2112177633Sdfr * where v is reachable from y. Return -1 if a loop was detected
2113177633Sdfr * (i.e. x is reachable from y, otherwise the number of vertices in
2114177633Sdfr * this subset.
2115177633Sdfr */
2116177633Sdfrstatic int
2117177633Sdfrgraph_delta_forward(struct owner_graph *g, struct owner_vertex *x,
2118177633Sdfr    struct owner_vertex *y, struct owner_vertex_list *delta)
2119177633Sdfr{
2120177633Sdfr	uint32_t gen;
2121177633Sdfr	struct owner_vertex *v;
2122177633Sdfr	struct owner_edge *e;
2123177633Sdfr	int n;
2124177633Sdfr
2125177633Sdfr	/*
2126177633Sdfr	 * We start with a set containing just y. Then for each vertex
2127177633Sdfr	 * v in the set so far unprocessed, we add each vertex that v
2128177633Sdfr	 * has an out-edge to and that is within the affected region
2129177633Sdfr	 * [y..x]. If we see the vertex x on our travels, stop
2130177633Sdfr	 * immediately.
2131177633Sdfr	 */
2132177633Sdfr	TAILQ_INIT(delta);
2133177633Sdfr	TAILQ_INSERT_TAIL(delta, y, v_link);
2134177633Sdfr	v = y;
2135177633Sdfr	n = 1;
2136177633Sdfr	gen = g->g_gen;
2137177633Sdfr	while (v) {
2138177633Sdfr		LIST_FOREACH(e, &v->v_outedges, e_outlink) {
2139177633Sdfr			if (e->e_to == x)
2140177633Sdfr				return -1;
2141177633Sdfr			if (e->e_to->v_order < x->v_order
2142177633Sdfr			    && e->e_to->v_gen != gen) {
2143177633Sdfr				e->e_to->v_gen = gen;
2144177633Sdfr				TAILQ_INSERT_TAIL(delta, e->e_to, v_link);
2145177633Sdfr				n++;
2146177633Sdfr			}
2147177633Sdfr		}
2148177633Sdfr		v = TAILQ_NEXT(v, v_link);
2149177633Sdfr	}
2150177633Sdfr
2151177633Sdfr	return (n);
2152177633Sdfr}
2153177633Sdfr
2154177633Sdfr/*
2155177633Sdfr * Calculate the sub-set of vertices v from the affected region [y..x]
2156177633Sdfr * where v reaches x. Return the number of vertices in this subset.
2157177633Sdfr */
2158177633Sdfrstatic int
2159177633Sdfrgraph_delta_backward(struct owner_graph *g, struct owner_vertex *x,
2160177633Sdfr    struct owner_vertex *y, struct owner_vertex_list *delta)
2161177633Sdfr{
2162177633Sdfr	uint32_t gen;
2163177633Sdfr	struct owner_vertex *v;
2164177633Sdfr	struct owner_edge *e;
2165177633Sdfr	int n;
2166177633Sdfr
2167177633Sdfr	/*
2168177633Sdfr	 * We start with a set containing just x. Then for each vertex
2169177633Sdfr	 * v in the set so far unprocessed, we add each vertex that v
2170177633Sdfr	 * has an in-edge from and that is within the affected region
2171177633Sdfr	 * [y..x].
2172177633Sdfr	 */
2173177633Sdfr	TAILQ_INIT(delta);
2174177633Sdfr	TAILQ_INSERT_TAIL(delta, x, v_link);
2175177633Sdfr	v = x;
2176177633Sdfr	n = 1;
2177177633Sdfr	gen = g->g_gen;
2178177633Sdfr	while (v) {
2179177633Sdfr		LIST_FOREACH(e, &v->v_inedges, e_inlink) {
2180177633Sdfr			if (e->e_from->v_order > y->v_order
2181177633Sdfr			    && e->e_from->v_gen != gen) {
2182177633Sdfr				e->e_from->v_gen = gen;
2183177633Sdfr				TAILQ_INSERT_HEAD(delta, e->e_from, v_link);
2184177633Sdfr				n++;
2185177633Sdfr			}
2186177633Sdfr		}
2187177633Sdfr		v = TAILQ_PREV(v, owner_vertex_list, v_link);
2188177633Sdfr	}
2189177633Sdfr
2190177633Sdfr	return (n);
2191177633Sdfr}
2192177633Sdfr
2193177633Sdfrstatic int
2194177633Sdfrgraph_add_indices(int *indices, int n, struct owner_vertex_list *set)
2195177633Sdfr{
2196177633Sdfr	struct owner_vertex *v;
2197177633Sdfr	int i, j;
2198177633Sdfr
2199177633Sdfr	TAILQ_FOREACH(v, set, v_link) {
2200177633Sdfr		for (i = n;
2201177633Sdfr		     i > 0 && indices[i - 1] > v->v_order; i--)
2202177633Sdfr			;
2203177633Sdfr		for (j = n - 1; j >= i; j--)
2204177633Sdfr			indices[j + 1] = indices[j];
2205177633Sdfr		indices[i] = v->v_order;
2206177633Sdfr		n++;
2207177633Sdfr	}
2208177633Sdfr
2209177633Sdfr	return (n);
2210177633Sdfr}
2211177633Sdfr
2212177633Sdfrstatic int
2213177633Sdfrgraph_assign_indices(struct owner_graph *g, int *indices, int nextunused,
2214177633Sdfr    struct owner_vertex_list *set)
2215177633Sdfr{
2216177633Sdfr	struct owner_vertex *v, *vlowest;
2217177633Sdfr
2218177633Sdfr	while (!TAILQ_EMPTY(set)) {
2219177633Sdfr		vlowest = NULL;
2220177633Sdfr		TAILQ_FOREACH(v, set, v_link) {
2221177633Sdfr			if (!vlowest || v->v_order < vlowest->v_order)
2222177633Sdfr				vlowest = v;
2223177633Sdfr		}
2224177633Sdfr		TAILQ_REMOVE(set, vlowest, v_link);
2225177633Sdfr		vlowest->v_order = indices[nextunused];
2226177633Sdfr		g->g_vertices[vlowest->v_order] = vlowest;
2227177633Sdfr		nextunused++;
2228177633Sdfr	}
2229177633Sdfr
2230177633Sdfr	return (nextunused);
2231177633Sdfr}
2232177633Sdfr
2233177633Sdfrstatic int
2234177633Sdfrgraph_add_edge(struct owner_graph *g, struct owner_vertex *x,
2235177633Sdfr    struct owner_vertex *y)
2236177633Sdfr{
2237177633Sdfr	struct owner_edge *e;
2238177633Sdfr	struct owner_vertex_list deltaF, deltaB;
2239177633Sdfr	int nF, nB, n, vi, i;
2240177633Sdfr	int *indices;
2241177633Sdfr
2242177633Sdfr	sx_assert(&lf_owner_graph_lock, SX_XLOCKED);
2243177633Sdfr
2244177633Sdfr	LIST_FOREACH(e, &x->v_outedges, e_outlink) {
2245177633Sdfr		if (e->e_to == y) {
2246177633Sdfr			e->e_refs++;
2247177633Sdfr			return (0);
2248177633Sdfr		}
2249177633Sdfr	}
2250177633Sdfr
22511960Sdg#ifdef LOCKF_DEBUG
2252177633Sdfr	if (lockf_debug & 8) {
2253177633Sdfr		printf("adding edge %d:", x->v_order);
2254177633Sdfr		lf_print_owner(x->v_owner);
2255177633Sdfr		printf(" -> %d:", y->v_order);
2256177633Sdfr		lf_print_owner(y->v_owner);
2257177633Sdfr		printf("\n");
225822521Sdyson	}
2259177633Sdfr#endif
2260177633Sdfr	if (y->v_order < x->v_order) {
2261177633Sdfr		/*
2262177633Sdfr		 * The new edge violates the order. First find the set
2263177633Sdfr		 * of affected vertices reachable from y (deltaF) and
2264177633Sdfr		 * the set of affect vertices affected that reach x
2265177633Sdfr		 * (deltaB), using the graph generation number to
2266177633Sdfr		 * detect whether we have visited a given vertex
2267177633Sdfr		 * already. We re-order the graph so that each vertex
2268177633Sdfr		 * in deltaB appears before each vertex in deltaF.
2269177633Sdfr		 *
2270177633Sdfr		 * If x is a member of deltaF, then the new edge would
2271177633Sdfr		 * create a cycle. Otherwise, we may assume that
2272177633Sdfr		 * deltaF and deltaB are disjoint.
2273177633Sdfr		 */
2274177633Sdfr		g->g_gen++;
2275177633Sdfr		if (g->g_gen == 0) {
2276177633Sdfr			/*
2277177633Sdfr			 * Generation wrap.
2278177633Sdfr			 */
2279177633Sdfr			for (vi = 0; vi < g->g_size; vi++) {
2280177633Sdfr				g->g_vertices[vi]->v_gen = 0;
2281177633Sdfr			}
2282177633Sdfr			g->g_gen++;
2283177633Sdfr		}
2284177633Sdfr		nF = graph_delta_forward(g, x, y, &deltaF);
2285177633Sdfr		if (nF < 0) {
2286177633Sdfr#ifdef LOCKF_DEBUG
2287177633Sdfr			if (lockf_debug & 8) {
2288177633Sdfr				struct owner_vertex_list path;
2289177633Sdfr				printf("deadlock: ");
2290177633Sdfr				TAILQ_INIT(&path);
2291177633Sdfr				graph_reaches(y, x, &path);
2292177633Sdfr				graph_print_vertices(&path);
2293177633Sdfr			}
2294177633Sdfr#endif
2295177633Sdfr			return (EDEADLK);
2296177633Sdfr		}
2297177633Sdfr
2298177633Sdfr#ifdef LOCKF_DEBUG
2299177633Sdfr		if (lockf_debug & 8) {
2300177633Sdfr			printf("re-ordering graph vertices\n");
2301177633Sdfr			printf("deltaF = ");
2302177633Sdfr			graph_print_vertices(&deltaF);
2303177633Sdfr		}
2304177633Sdfr#endif
2305177633Sdfr
2306177633Sdfr		nB = graph_delta_backward(g, x, y, &deltaB);
2307177633Sdfr
2308177633Sdfr#ifdef LOCKF_DEBUG
2309177633Sdfr		if (lockf_debug & 8) {
2310177633Sdfr			printf("deltaB = ");
2311177633Sdfr			graph_print_vertices(&deltaB);
2312177633Sdfr		}
2313177633Sdfr#endif
2314177633Sdfr
2315177633Sdfr		/*
2316177633Sdfr		 * We first build a set of vertex indices (vertex
2317177633Sdfr		 * order values) that we may use, then we re-assign
2318177633Sdfr		 * orders first to those vertices in deltaB, then to
2319177633Sdfr		 * deltaF. Note that the contents of deltaF and deltaB
2320177633Sdfr		 * may be partially disordered - we perform an
2321177633Sdfr		 * insertion sort while building our index set.
2322177633Sdfr		 */
2323177633Sdfr		indices = g->g_indexbuf;
2324177633Sdfr		n = graph_add_indices(indices, 0, &deltaF);
2325177633Sdfr		graph_add_indices(indices, n, &deltaB);
2326177633Sdfr
2327177633Sdfr		/*
2328177633Sdfr		 * We must also be sure to maintain the relative
2329177633Sdfr		 * ordering of deltaF and deltaB when re-assigning
2330177633Sdfr		 * vertices. We do this by iteratively removing the
2331177633Sdfr		 * lowest ordered element from the set and assigning
2332177633Sdfr		 * it the next value from our new ordering.
2333177633Sdfr		 */
2334177633Sdfr		i = graph_assign_indices(g, indices, 0, &deltaB);
2335177633Sdfr		graph_assign_indices(g, indices, i, &deltaF);
2336177633Sdfr
2337177633Sdfr#ifdef LOCKF_DEBUG
2338177633Sdfr		if (lockf_debug & 8) {
2339177633Sdfr			struct owner_vertex_list set;
2340177633Sdfr			TAILQ_INIT(&set);
2341177633Sdfr			for (i = 0; i < nB + nF; i++)
2342177633Sdfr				TAILQ_INSERT_TAIL(&set,
2343177633Sdfr				    g->g_vertices[indices[i]], v_link);
2344177633Sdfr			printf("new ordering = ");
2345177633Sdfr			graph_print_vertices(&set);
2346177633Sdfr		}
2347177633Sdfr#endif
2348177633Sdfr	}
2349177633Sdfr
2350177633Sdfr	KASSERT(x->v_order < y->v_order, ("Failed to re-order graph"));
2351177633Sdfr
2352177633Sdfr#ifdef LOCKF_DEBUG
2353177633Sdfr	if (lockf_debug & 8) {
2354177633Sdfr		graph_check(g, TRUE);
2355177633Sdfr	}
2356177633Sdfr#endif
2357177633Sdfr
2358177633Sdfr	e = malloc(sizeof(struct owner_edge), M_LOCKF, M_WAITOK);
2359177633Sdfr
2360177633Sdfr	LIST_INSERT_HEAD(&x->v_outedges, e, e_outlink);
2361177633Sdfr	LIST_INSERT_HEAD(&y->v_inedges, e, e_inlink);
2362177633Sdfr	e->e_refs = 1;
2363177633Sdfr	e->e_from = x;
2364177633Sdfr	e->e_to = y;
2365177633Sdfr
2366177633Sdfr	return (0);
23671960Sdg}
23681960Sdg
2369177633Sdfr/*
2370177633Sdfr * Remove an edge x->y from the graph.
2371177633Sdfr */
2372177633Sdfrstatic void
2373177633Sdfrgraph_remove_edge(struct owner_graph *g, struct owner_vertex *x,
2374177633Sdfr    struct owner_vertex *y)
2375177633Sdfr{
2376177633Sdfr	struct owner_edge *e;
2377177633Sdfr
2378177633Sdfr	sx_assert(&lf_owner_graph_lock, SX_XLOCKED);
2379177633Sdfr
2380177633Sdfr	LIST_FOREACH(e, &x->v_outedges, e_outlink) {
2381177633Sdfr		if (e->e_to == y)
2382177633Sdfr			break;
2383177633Sdfr	}
2384177633Sdfr	KASSERT(e, ("Removing non-existent edge from deadlock graph"));
2385177633Sdfr
2386177633Sdfr	e->e_refs--;
2387177633Sdfr	if (e->e_refs == 0) {
23881960Sdg#ifdef LOCKF_DEBUG
2389177633Sdfr		if (lockf_debug & 8) {
2390177633Sdfr			printf("removing edge %d:", x->v_order);
2391177633Sdfr			lf_print_owner(x->v_owner);
2392177633Sdfr			printf(" -> %d:", y->v_order);
2393177633Sdfr			lf_print_owner(y->v_owner);
2394177633Sdfr			printf("\n");
2395177633Sdfr		}
2396177633Sdfr#endif
2397177633Sdfr		LIST_REMOVE(e, e_outlink);
2398177633Sdfr		LIST_REMOVE(e, e_inlink);
2399177633Sdfr		free(e, M_LOCKF);
2400177633Sdfr	}
2401177633Sdfr}
2402177633Sdfr
24031960Sdg/*
2404177633Sdfr * Allocate a vertex from the free list. Return ENOMEM if there are
2405177633Sdfr * none.
2406177633Sdfr */
2407177633Sdfrstatic struct owner_vertex *
2408177633Sdfrgraph_alloc_vertex(struct owner_graph *g, struct lock_owner *lo)
2409177633Sdfr{
2410177633Sdfr	struct owner_vertex *v;
2411177633Sdfr
2412177633Sdfr	sx_assert(&lf_owner_graph_lock, SX_XLOCKED);
2413177633Sdfr
2414177633Sdfr	v = malloc(sizeof(struct owner_vertex), M_LOCKF, M_WAITOK);
2415177633Sdfr	if (g->g_size == g->g_space) {
2416177633Sdfr		g->g_vertices = realloc(g->g_vertices,
2417177633Sdfr		    2 * g->g_space * sizeof(struct owner_vertex *),
2418177633Sdfr		    M_LOCKF, M_WAITOK);
2419177633Sdfr		free(g->g_indexbuf, M_LOCKF);
2420177633Sdfr		g->g_indexbuf = malloc(2 * g->g_space * sizeof(int),
2421177633Sdfr		    M_LOCKF, M_WAITOK);
2422177633Sdfr		g->g_space = 2 * g->g_space;
2423177633Sdfr	}
2424177633Sdfr	v->v_order = g->g_size;
2425177633Sdfr	v->v_gen = g->g_gen;
2426177633Sdfr	g->g_vertices[g->g_size] = v;
2427177633Sdfr	g->g_size++;
2428177633Sdfr
2429177633Sdfr	LIST_INIT(&v->v_outedges);
2430177633Sdfr	LIST_INIT(&v->v_inedges);
2431177633Sdfr	v->v_owner = lo;
2432177633Sdfr
2433177633Sdfr	return (v);
2434177633Sdfr}
2435177633Sdfr
2436177633Sdfrstatic void
2437177633Sdfrgraph_free_vertex(struct owner_graph *g, struct owner_vertex *v)
2438177633Sdfr{
2439177633Sdfr	struct owner_vertex *w;
2440177633Sdfr	int i;
2441177633Sdfr
2442177633Sdfr	sx_assert(&lf_owner_graph_lock, SX_XLOCKED);
2443177633Sdfr
2444177633Sdfr	KASSERT(LIST_EMPTY(&v->v_outedges), ("Freeing vertex with edges"));
2445177633Sdfr	KASSERT(LIST_EMPTY(&v->v_inedges), ("Freeing vertex with edges"));
2446177633Sdfr
2447177633Sdfr	/*
2448177633Sdfr	 * Remove from the graph's array and close up the gap,
2449177633Sdfr	 * renumbering the other vertices.
2450177633Sdfr	 */
2451177633Sdfr	for (i = v->v_order + 1; i < g->g_size; i++) {
2452177633Sdfr		w = g->g_vertices[i];
2453177633Sdfr		w->v_order--;
2454177633Sdfr		g->g_vertices[i - 1] = w;
2455177633Sdfr	}
2456177633Sdfr	g->g_size--;
2457177633Sdfr
2458177633Sdfr	free(v, M_LOCKF);
2459177633Sdfr}
2460177633Sdfr
2461177633Sdfrstatic struct owner_graph *
2462177633Sdfrgraph_init(struct owner_graph *g)
2463177633Sdfr{
2464177633Sdfr
2465177633Sdfr	g->g_vertices = malloc(10 * sizeof(struct owner_vertex *),
2466177633Sdfr	    M_LOCKF, M_WAITOK);
2467177633Sdfr	g->g_size = 0;
2468177633Sdfr	g->g_space = 10;
2469177633Sdfr	g->g_indexbuf = malloc(g->g_space * sizeof(int), M_LOCKF, M_WAITOK);
2470177633Sdfr	g->g_gen = 0;
2471177633Sdfr
2472177633Sdfr	return (g);
2473177633Sdfr}
2474177633Sdfr
2475177633Sdfr#ifdef LOCKF_DEBUG
2476177633Sdfr/*
2477177633Sdfr * Print description of a lock owner
2478177633Sdfr */
2479177633Sdfrstatic void
2480177633Sdfrlf_print_owner(struct lock_owner *lo)
2481177633Sdfr{
2482177633Sdfr
2483177633Sdfr	if (lo->lo_flags & F_REMOTE) {
2484177633Sdfr		printf("remote pid %d, system %d",
2485177633Sdfr		    lo->lo_pid, lo->lo_sysid);
2486177633Sdfr	} else if (lo->lo_flags & F_FLOCK) {
2487177633Sdfr		printf("file %p", lo->lo_id);
2488177633Sdfr	} else {
2489177633Sdfr		printf("local pid %d", lo->lo_pid);
2490177633Sdfr	}
2491177633Sdfr}
2492177633Sdfr
2493177633Sdfr/*
24941960Sdg * Print out a lock.
24951960Sdg */
2496140808Sjeffstatic void
2497177633Sdfrlf_print(char *tag, struct lockf_entry *lock)
24981960Sdg{
24998876Srgrimes
250037951Sbde	printf("%s: lock %p for ", tag, (void *)lock);
2501177633Sdfr	lf_print_owner(lock->lf_owner);
250287211Salfred	if (lock->lf_inode != (struct inode *)0)
2503177633Sdfr		printf(" in ino %ju on dev <%s>,",
2504106584Smux		    (uintmax_t)lock->lf_inode->i_number,
2505306739Sjhb		    devtoname(ITODEV(lock->lf_inode)));
2506177633Sdfr	printf(" %s, start %jd, end ",
2507177633Sdfr	    lock->lf_type == F_RDLCK ? "shared" :
2508177633Sdfr	    lock->lf_type == F_WRLCK ? "exclusive" :
2509177633Sdfr	    lock->lf_type == F_UNLCK ? "unlock" : "unknown",
2510177633Sdfr	    (intmax_t)lock->lf_start);
2511177633Sdfr	if (lock->lf_end == OFF_MAX)
2512177633Sdfr		printf("EOF");
251387211Salfred	else
2514177633Sdfr		printf("%jd", (intmax_t)lock->lf_end);
2515177633Sdfr	if (!LIST_EMPTY(&lock->lf_outedges))
2516177633Sdfr		printf(" block %p\n",
2517177633Sdfr		    (void *)LIST_FIRST(&lock->lf_outedges)->le_to);
25181960Sdg	else
25191960Sdg		printf("\n");
25201960Sdg}
25211960Sdg
2522140808Sjeffstatic void
2523177633Sdfrlf_printlist(char *tag, struct lockf_entry *lock)
25241960Sdg{
2525177633Sdfr	struct lockf_entry *lf, *blk;
2526177633Sdfr	struct lockf_edge *e;
25271960Sdg
252887211Salfred	if (lock->lf_inode == (struct inode *)0)
252987211Salfred		return;
253087211Salfred
2531144278Sphk	printf("%s: Lock list for ino %ju on dev <%s>:\n",
2532106584Smux	    tag, (uintmax_t)lock->lf_inode->i_number,
2533306739Sjhb	    devtoname(ITODEV(lock->lf_inode)));
2534178247Sdfr	LIST_FOREACH(lf, &lock->lf_vnode->v_lockf->ls_active, lf_link) {
253537951Sbde		printf("\tlock %p for ",(void *)lf);
2536177633Sdfr		lf_print_owner(lock->lf_owner);
2537106584Smux		printf(", %s, start %jd, end %jd",
253837951Sbde		    lf->lf_type == F_RDLCK ? "shared" :
253937951Sbde		    lf->lf_type == F_WRLCK ? "exclusive" :
254037951Sbde		    lf->lf_type == F_UNLCK ? "unlock" :
2541106584Smux		    "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
2542177633Sdfr		LIST_FOREACH(e, &lf->lf_outedges, le_outlink) {
2543177633Sdfr			blk = e->le_to;
254437951Sbde			printf("\n\t\tlock request %p for ", (void *)blk);
2545177633Sdfr			lf_print_owner(blk->lf_owner);
2546106584Smux			printf(", %s, start %jd, end %jd",
254737951Sbde			    blk->lf_type == F_RDLCK ? "shared" :
254837951Sbde			    blk->lf_type == F_WRLCK ? "exclusive" :
254937951Sbde			    blk->lf_type == F_UNLCK ? "unlock" :
2550106584Smux			    "unknown", (intmax_t)blk->lf_start,
2551106584Smux			    (intmax_t)blk->lf_end);
2552177633Sdfr			if (!LIST_EMPTY(&blk->lf_inedges))
255322521Sdyson				panic("lf_printlist: bad list");
255422521Sdyson		}
255522521Sdyson		printf("\n");
25561960Sdg	}
25571960Sdg}
25581960Sdg#endif /* LOCKF_DEBUG */
2559