uipc_mqueue.c revision 220245
1/*-
2 * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28/*
29 * POSIX message queue implementation.
30 *
31 * 1) A mqueue filesystem can be mounted, each message queue appears
32 *    in mounted directory, user can change queue's permission and
33 *    ownership, or remove a queue. Manually creating a file in the
34 *    directory causes a message queue to be created in the kernel with
35 *    default message queue attributes applied and same name used, this
36 *    method is not advocated since mq_open syscall allows user to specify
37 *    different attributes. Also the file system can be mounted multiple
38 *    times at different mount points but shows same contents.
39 *
40 * 2) Standard POSIX message queue API. The syscalls do not use vfs layer,
41 *    but directly operate on internal data structure, this allows user to
42 *    use the IPC facility without having to mount mqueue file system.
43 */
44
45#include <sys/cdefs.h>
46__FBSDID("$FreeBSD: head/sys/kern/uipc_mqueue.c 220245 2011-04-01 13:28:34Z kib $");
47
48#include "opt_compat.h"
49
50#include <sys/param.h>
51#include <sys/kernel.h>
52#include <sys/systm.h>
53#include <sys/limits.h>
54#include <sys/buf.h>
55#include <sys/dirent.h>
56#include <sys/event.h>
57#include <sys/eventhandler.h>
58#include <sys/fcntl.h>
59#include <sys/file.h>
60#include <sys/filedesc.h>
61#include <sys/lock.h>
62#include <sys/malloc.h>
63#include <sys/module.h>
64#include <sys/mount.h>
65#include <sys/mqueue.h>
66#include <sys/mutex.h>
67#include <sys/namei.h>
68#include <sys/posix4.h>
69#include <sys/poll.h>
70#include <sys/priv.h>
71#include <sys/proc.h>
72#include <sys/queue.h>
73#include <sys/sysproto.h>
74#include <sys/stat.h>
75#include <sys/syscall.h>
76#include <sys/syscallsubr.h>
77#include <sys/sysent.h>
78#include <sys/sx.h>
79#include <sys/sysctl.h>
80#include <sys/taskqueue.h>
81#include <sys/unistd.h>
82#include <sys/vnode.h>
83#include <machine/atomic.h>
84
85FEATURE(p1003_1b_mqueue, "POSIX P1003.1B message queues support");
86
87/*
88 * Limits and constants
89 */
90#define	MQFS_NAMELEN		NAME_MAX
91#define MQFS_DELEN		(8 + MQFS_NAMELEN)
92
93/* node types */
94typedef enum {
95	mqfstype_none = 0,
96	mqfstype_root,
97	mqfstype_dir,
98	mqfstype_this,
99	mqfstype_parent,
100	mqfstype_file,
101	mqfstype_symlink,
102} mqfs_type_t;
103
104struct mqfs_node;
105
106/*
107 * mqfs_info: describes a mqfs instance
108 */
109struct mqfs_info {
110	struct sx		mi_lock;
111	struct mqfs_node	*mi_root;
112	struct unrhdr		*mi_unrhdr;
113};
114
115struct mqfs_vdata {
116	LIST_ENTRY(mqfs_vdata)	mv_link;
117	struct mqfs_node	*mv_node;
118	struct vnode		*mv_vnode;
119	struct task		mv_task;
120};
121
122/*
123 * mqfs_node: describes a node (file or directory) within a mqfs
124 */
125struct mqfs_node {
126	char			mn_name[MQFS_NAMELEN+1];
127	struct mqfs_info	*mn_info;
128	struct mqfs_node	*mn_parent;
129	LIST_HEAD(,mqfs_node)	mn_children;
130	LIST_ENTRY(mqfs_node)	mn_sibling;
131	LIST_HEAD(,mqfs_vdata)	mn_vnodes;
132	int			mn_refcount;
133	mqfs_type_t		mn_type;
134	int			mn_deleted;
135	uint32_t		mn_fileno;
136	void			*mn_data;
137	struct timespec		mn_birth;
138	struct timespec		mn_ctime;
139	struct timespec		mn_atime;
140	struct timespec		mn_mtime;
141	uid_t			mn_uid;
142	gid_t			mn_gid;
143	int			mn_mode;
144};
145
146#define	VTON(vp)	(((struct mqfs_vdata *)((vp)->v_data))->mv_node)
147#define VTOMQ(vp) 	((struct mqueue *)(VTON(vp)->mn_data))
148#define	VFSTOMQFS(m)	((struct mqfs_info *)((m)->mnt_data))
149#define	FPTOMQ(fp)	((struct mqueue *)(((struct mqfs_node *) \
150				(fp)->f_data)->mn_data))
151
152TAILQ_HEAD(msgq, mqueue_msg);
153
154struct mqueue;
155
156struct mqueue_notifier {
157	LIST_ENTRY(mqueue_notifier)	nt_link;
158	struct sigevent			nt_sigev;
159	ksiginfo_t			nt_ksi;
160	struct proc			*nt_proc;
161};
162
163struct mqueue {
164	struct mtx	mq_mutex;
165	int		mq_flags;
166	long		mq_maxmsg;
167	long		mq_msgsize;
168	long		mq_curmsgs;
169	long		mq_totalbytes;
170	struct msgq	mq_msgq;
171	int		mq_receivers;
172	int		mq_senders;
173	struct selinfo	mq_rsel;
174	struct selinfo	mq_wsel;
175	struct mqueue_notifier	*mq_notifier;
176};
177
178#define	MQ_RSEL		0x01
179#define	MQ_WSEL		0x02
180
181struct mqueue_msg {
182	TAILQ_ENTRY(mqueue_msg)	msg_link;
183	unsigned int	msg_prio;
184	unsigned int	msg_size;
185	/* following real data... */
186};
187
188SYSCTL_NODE(_kern, OID_AUTO, mqueue, CTLFLAG_RW, 0,
189	"POSIX real time message queue");
190
191static int	default_maxmsg  = 10;
192static int	default_msgsize = 1024;
193
194static int	maxmsg = 100;
195SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsg, CTLFLAG_RW,
196    &maxmsg, 0, "Default maximum messages in queue");
197static int	maxmsgsize = 16384;
198SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsgsize, CTLFLAG_RW,
199    &maxmsgsize, 0, "Default maximum message size");
200static int	maxmq = 100;
201SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmq, CTLFLAG_RW,
202    &maxmq, 0, "maximum message queues");
203static int	curmq = 0;
204SYSCTL_INT(_kern_mqueue, OID_AUTO, curmq, CTLFLAG_RW,
205    &curmq, 0, "current message queue number");
206static int	unloadable = 0;
207static MALLOC_DEFINE(M_MQUEUEDATA, "mqdata", "mqueue data");
208
209static eventhandler_tag exit_tag;
210
211/* Only one instance per-system */
212static struct mqfs_info		mqfs_data;
213static uma_zone_t		mqnode_zone;
214static uma_zone_t		mqueue_zone;
215static uma_zone_t		mvdata_zone;
216static uma_zone_t		mqnoti_zone;
217static struct vop_vector	mqfs_vnodeops;
218static struct fileops		mqueueops;
219
220/*
221 * Directory structure construction and manipulation
222 */
223#ifdef notyet
224static struct mqfs_node	*mqfs_create_dir(struct mqfs_node *parent,
225	const char *name, int namelen, struct ucred *cred, int mode);
226static struct mqfs_node	*mqfs_create_link(struct mqfs_node *parent,
227	const char *name, int namelen, struct ucred *cred, int mode);
228#endif
229
230static struct mqfs_node	*mqfs_create_file(struct mqfs_node *parent,
231	const char *name, int namelen, struct ucred *cred, int mode);
232static int	mqfs_destroy(struct mqfs_node *mn);
233static void	mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn);
234static void	mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn);
235static int	mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn);
236
237/*
238 * Message queue construction and maniplation
239 */
240static struct mqueue	*mqueue_alloc(const struct mq_attr *attr);
241static void	mqueue_free(struct mqueue *mq);
242static int	mqueue_send(struct mqueue *mq, const char *msg_ptr,
243			size_t msg_len, unsigned msg_prio, int waitok,
244			const struct timespec *abs_timeout);
245static int	mqueue_receive(struct mqueue *mq, char *msg_ptr,
246			size_t msg_len, unsigned *msg_prio, int waitok,
247			const struct timespec *abs_timeout);
248static int	_mqueue_send(struct mqueue *mq, struct mqueue_msg *msg,
249			int timo);
250static int	_mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg,
251			int timo);
252static void	mqueue_send_notification(struct mqueue *mq);
253static void	mqueue_fdclose(struct thread *td, int fd, struct file *fp);
254static void	mq_proc_exit(void *arg, struct proc *p);
255
256/*
257 * kqueue filters
258 */
259static void	filt_mqdetach(struct knote *kn);
260static int	filt_mqread(struct knote *kn, long hint);
261static int	filt_mqwrite(struct knote *kn, long hint);
262
263struct filterops mq_rfiltops = {
264	.f_isfd = 1,
265	.f_detach = filt_mqdetach,
266	.f_event = filt_mqread,
267};
268struct filterops mq_wfiltops = {
269	.f_isfd = 1,
270	.f_detach = filt_mqdetach,
271	.f_event = filt_mqwrite,
272};
273
274/*
275 * Initialize fileno bitmap
276 */
277static void
278mqfs_fileno_init(struct mqfs_info *mi)
279{
280	struct unrhdr *up;
281
282	up = new_unrhdr(1, INT_MAX, NULL);
283	mi->mi_unrhdr = up;
284}
285
286/*
287 * Tear down fileno bitmap
288 */
289static void
290mqfs_fileno_uninit(struct mqfs_info *mi)
291{
292	struct unrhdr *up;
293
294	up = mi->mi_unrhdr;
295	mi->mi_unrhdr = NULL;
296	delete_unrhdr(up);
297}
298
299/*
300 * Allocate a file number
301 */
302static void
303mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn)
304{
305	/* make sure our parent has a file number */
306	if (mn->mn_parent && !mn->mn_parent->mn_fileno)
307		mqfs_fileno_alloc(mi, mn->mn_parent);
308
309	switch (mn->mn_type) {
310	case mqfstype_root:
311	case mqfstype_dir:
312	case mqfstype_file:
313	case mqfstype_symlink:
314		mn->mn_fileno = alloc_unr(mi->mi_unrhdr);
315		break;
316	case mqfstype_this:
317		KASSERT(mn->mn_parent != NULL,
318		    ("mqfstype_this node has no parent"));
319		mn->mn_fileno = mn->mn_parent->mn_fileno;
320		break;
321	case mqfstype_parent:
322		KASSERT(mn->mn_parent != NULL,
323		    ("mqfstype_parent node has no parent"));
324		if (mn->mn_parent == mi->mi_root) {
325			mn->mn_fileno = mn->mn_parent->mn_fileno;
326			break;
327		}
328		KASSERT(mn->mn_parent->mn_parent != NULL,
329		    ("mqfstype_parent node has no grandparent"));
330		mn->mn_fileno = mn->mn_parent->mn_parent->mn_fileno;
331		break;
332	default:
333		KASSERT(0,
334		    ("mqfs_fileno_alloc() called for unknown type node: %d",
335			mn->mn_type));
336		break;
337	}
338}
339
340/*
341 * Release a file number
342 */
343static void
344mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn)
345{
346	switch (mn->mn_type) {
347	case mqfstype_root:
348	case mqfstype_dir:
349	case mqfstype_file:
350	case mqfstype_symlink:
351		free_unr(mi->mi_unrhdr, mn->mn_fileno);
352		break;
353	case mqfstype_this:
354	case mqfstype_parent:
355		/* ignore these, as they don't "own" their file number */
356		break;
357	default:
358		KASSERT(0,
359		    ("mqfs_fileno_free() called for unknown type node: %d",
360			mn->mn_type));
361		break;
362	}
363}
364
365static __inline struct mqfs_node *
366mqnode_alloc(void)
367{
368	return uma_zalloc(mqnode_zone, M_WAITOK | M_ZERO);
369}
370
371static __inline void
372mqnode_free(struct mqfs_node *node)
373{
374	uma_zfree(mqnode_zone, node);
375}
376
377static __inline void
378mqnode_addref(struct mqfs_node *node)
379{
380	atomic_fetchadd_int(&node->mn_refcount, 1);
381}
382
383static __inline void
384mqnode_release(struct mqfs_node *node)
385{
386	struct mqfs_info *mqfs;
387	int old, exp;
388
389	mqfs = node->mn_info;
390	old = atomic_fetchadd_int(&node->mn_refcount, -1);
391	if (node->mn_type == mqfstype_dir ||
392	    node->mn_type == mqfstype_root)
393		exp = 3; /* include . and .. */
394	else
395		exp = 1;
396	if (old == exp) {
397		int locked = sx_xlocked(&mqfs->mi_lock);
398		if (!locked)
399			sx_xlock(&mqfs->mi_lock);
400		mqfs_destroy(node);
401		if (!locked)
402			sx_xunlock(&mqfs->mi_lock);
403	}
404}
405
406/*
407 * Add a node to a directory
408 */
409static int
410mqfs_add_node(struct mqfs_node *parent, struct mqfs_node *node)
411{
412	KASSERT(parent != NULL, ("%s(): parent is NULL", __func__));
413	KASSERT(parent->mn_info != NULL,
414	    ("%s(): parent has no mn_info", __func__));
415	KASSERT(parent->mn_type == mqfstype_dir ||
416	    parent->mn_type == mqfstype_root,
417	    ("%s(): parent is not a directory", __func__));
418
419	node->mn_info = parent->mn_info;
420	node->mn_parent = parent;
421	LIST_INIT(&node->mn_children);
422	LIST_INIT(&node->mn_vnodes);
423	LIST_INSERT_HEAD(&parent->mn_children, node, mn_sibling);
424	mqnode_addref(parent);
425	return (0);
426}
427
428static struct mqfs_node *
429mqfs_create_node(const char *name, int namelen, struct ucred *cred, int mode,
430	int nodetype)
431{
432	struct mqfs_node *node;
433
434	node = mqnode_alloc();
435	strncpy(node->mn_name, name, namelen);
436	node->mn_type = nodetype;
437	node->mn_refcount = 1;
438	vfs_timestamp(&node->mn_birth);
439	node->mn_ctime = node->mn_atime = node->mn_mtime
440		= node->mn_birth;
441	node->mn_uid = cred->cr_uid;
442	node->mn_gid = cred->cr_gid;
443	node->mn_mode = mode;
444	return (node);
445}
446
447/*
448 * Create a file
449 */
450static struct mqfs_node *
451mqfs_create_file(struct mqfs_node *parent, const char *name, int namelen,
452	struct ucred *cred, int mode)
453{
454	struct mqfs_node *node;
455
456	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_file);
457	if (mqfs_add_node(parent, node) != 0) {
458		mqnode_free(node);
459		return (NULL);
460	}
461	return (node);
462}
463
464/*
465 * Add . and .. to a directory
466 */
467static int
468mqfs_fixup_dir(struct mqfs_node *parent)
469{
470	struct mqfs_node *dir;
471
472	dir = mqnode_alloc();
473	dir->mn_name[0] = '.';
474	dir->mn_type = mqfstype_this;
475	dir->mn_refcount = 1;
476	if (mqfs_add_node(parent, dir) != 0) {
477		mqnode_free(dir);
478		return (-1);
479	}
480
481	dir = mqnode_alloc();
482	dir->mn_name[0] = dir->mn_name[1] = '.';
483	dir->mn_type = mqfstype_parent;
484	dir->mn_refcount = 1;
485
486	if (mqfs_add_node(parent, dir) != 0) {
487		mqnode_free(dir);
488		return (-1);
489	}
490
491	return (0);
492}
493
494#ifdef notyet
495
496/*
497 * Create a directory
498 */
499static struct mqfs_node *
500mqfs_create_dir(struct mqfs_node *parent, const char *name, int namelen,
501	struct ucred *cred, int mode)
502{
503	struct mqfs_node *node;
504
505	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_dir);
506	if (mqfs_add_node(parent, node) != 0) {
507		mqnode_free(node);
508		return (NULL);
509	}
510
511	if (mqfs_fixup_dir(node) != 0) {
512		mqfs_destroy(node);
513		return (NULL);
514	}
515	return (node);
516}
517
518/*
519 * Create a symlink
520 */
521static struct mqfs_node *
522mqfs_create_link(struct mqfs_node *parent, const char *name, int namelen,
523	struct ucred *cred, int mode)
524{
525	struct mqfs_node *node;
526
527	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_symlink);
528	if (mqfs_add_node(parent, node) != 0) {
529		mqnode_free(node);
530		return (NULL);
531	}
532	return (node);
533}
534
535#endif
536
537/*
538 * Destroy a node or a tree of nodes
539 */
540static int
541mqfs_destroy(struct mqfs_node *node)
542{
543	struct mqfs_node *parent;
544
545	KASSERT(node != NULL,
546	    ("%s(): node is NULL", __func__));
547	KASSERT(node->mn_info != NULL,
548	    ("%s(): node has no mn_info", __func__));
549
550	/* destroy children */
551	if (node->mn_type == mqfstype_dir || node->mn_type == mqfstype_root)
552		while (! LIST_EMPTY(&node->mn_children))
553			mqfs_destroy(LIST_FIRST(&node->mn_children));
554
555	/* unlink from parent */
556	if ((parent = node->mn_parent) != NULL) {
557		KASSERT(parent->mn_info == node->mn_info,
558		    ("%s(): parent has different mn_info", __func__));
559		LIST_REMOVE(node, mn_sibling);
560	}
561
562	if (node->mn_fileno != 0)
563		mqfs_fileno_free(node->mn_info, node);
564	if (node->mn_data != NULL)
565		mqueue_free(node->mn_data);
566	mqnode_free(node);
567	return (0);
568}
569
570/*
571 * Mount a mqfs instance
572 */
573static int
574mqfs_mount(struct mount *mp)
575{
576	struct statfs *sbp;
577
578	if (mp->mnt_flag & MNT_UPDATE)
579		return (EOPNOTSUPP);
580
581	mp->mnt_data = &mqfs_data;
582	MNT_ILOCK(mp);
583	mp->mnt_flag |= MNT_LOCAL;
584	mp->mnt_kern_flag |= MNTK_MPSAFE;
585	MNT_IUNLOCK(mp);
586	vfs_getnewfsid(mp);
587
588	sbp = &mp->mnt_stat;
589	vfs_mountedfrom(mp, "mqueue");
590	sbp->f_bsize = PAGE_SIZE;
591	sbp->f_iosize = PAGE_SIZE;
592	sbp->f_blocks = 1;
593	sbp->f_bfree = 0;
594	sbp->f_bavail = 0;
595	sbp->f_files = 1;
596	sbp->f_ffree = 0;
597	return (0);
598}
599
600/*
601 * Unmount a mqfs instance
602 */
603static int
604mqfs_unmount(struct mount *mp, int mntflags)
605{
606	int error;
607
608	error = vflush(mp, 0, (mntflags & MNT_FORCE) ?  FORCECLOSE : 0,
609	    curthread);
610	return (error);
611}
612
613/*
614 * Return a root vnode
615 */
616static int
617mqfs_root(struct mount *mp, int flags, struct vnode **vpp)
618{
619	struct mqfs_info *mqfs;
620	int ret;
621
622	mqfs = VFSTOMQFS(mp);
623	ret = mqfs_allocv(mp, vpp, mqfs->mi_root);
624	return (ret);
625}
626
627/*
628 * Return filesystem stats
629 */
630static int
631mqfs_statfs(struct mount *mp, struct statfs *sbp)
632{
633	/* XXX update statistics */
634	return (0);
635}
636
637/*
638 * Initialize a mqfs instance
639 */
640static int
641mqfs_init(struct vfsconf *vfc)
642{
643	struct mqfs_node *root;
644	struct mqfs_info *mi;
645
646	mqnode_zone = uma_zcreate("mqnode", sizeof(struct mqfs_node),
647		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
648	mqueue_zone = uma_zcreate("mqueue", sizeof(struct mqueue),
649		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
650	mvdata_zone = uma_zcreate("mvdata",
651		sizeof(struct mqfs_vdata), NULL, NULL, NULL,
652		NULL, UMA_ALIGN_PTR, 0);
653	mqnoti_zone = uma_zcreate("mqnotifier", sizeof(struct mqueue_notifier),
654		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
655	mi = &mqfs_data;
656	sx_init(&mi->mi_lock, "mqfs lock");
657	/* set up the root diretory */
658	root = mqfs_create_node("/", 1, curthread->td_ucred, 01777,
659		mqfstype_root);
660	root->mn_info = mi;
661	LIST_INIT(&root->mn_children);
662	LIST_INIT(&root->mn_vnodes);
663	mi->mi_root = root;
664	mqfs_fileno_init(mi);
665	mqfs_fileno_alloc(mi, root);
666	mqfs_fixup_dir(root);
667	exit_tag = EVENTHANDLER_REGISTER(process_exit, mq_proc_exit, NULL,
668	    EVENTHANDLER_PRI_ANY);
669	mq_fdclose = mqueue_fdclose;
670	p31b_setcfg(CTL_P1003_1B_MESSAGE_PASSING, _POSIX_MESSAGE_PASSING);
671	return (0);
672}
673
674/*
675 * Destroy a mqfs instance
676 */
677static int
678mqfs_uninit(struct vfsconf *vfc)
679{
680	struct mqfs_info *mi;
681
682	if (!unloadable)
683		return (EOPNOTSUPP);
684	EVENTHANDLER_DEREGISTER(process_exit, exit_tag);
685	mi = &mqfs_data;
686	mqfs_destroy(mi->mi_root);
687	mi->mi_root = NULL;
688	mqfs_fileno_uninit(mi);
689	sx_destroy(&mi->mi_lock);
690	uma_zdestroy(mqnode_zone);
691	uma_zdestroy(mqueue_zone);
692	uma_zdestroy(mvdata_zone);
693	uma_zdestroy(mqnoti_zone);
694	return (0);
695}
696
697/*
698 * task routine
699 */
700static void
701do_recycle(void *context, int pending __unused)
702{
703	struct vnode *vp = (struct vnode *)context;
704
705	vrecycle(vp, curthread);
706	vdrop(vp);
707}
708
709/*
710 * Allocate a vnode
711 */
712static int
713mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn)
714{
715	struct mqfs_vdata *vd;
716	struct mqfs_info  *mqfs;
717	struct vnode *newvpp;
718	int error;
719
720	mqfs = pn->mn_info;
721	*vpp = NULL;
722	sx_xlock(&mqfs->mi_lock);
723	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
724		if (vd->mv_vnode->v_mount == mp) {
725			vhold(vd->mv_vnode);
726			break;
727		}
728	}
729
730	if (vd != NULL) {
731found:
732		*vpp = vd->mv_vnode;
733		sx_xunlock(&mqfs->mi_lock);
734		error = vget(*vpp, LK_RETRY | LK_EXCLUSIVE, curthread);
735		vdrop(*vpp);
736		return (error);
737	}
738	sx_xunlock(&mqfs->mi_lock);
739
740	error = getnewvnode("mqueue", mp, &mqfs_vnodeops, &newvpp);
741	if (error)
742		return (error);
743	vn_lock(newvpp, LK_EXCLUSIVE | LK_RETRY);
744	error = insmntque(newvpp, mp);
745	if (error != 0)
746		return (error);
747
748	sx_xlock(&mqfs->mi_lock);
749	/*
750	 * Check if it has already been allocated
751	 * while we were blocked.
752	 */
753	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
754		if (vd->mv_vnode->v_mount == mp) {
755			vhold(vd->mv_vnode);
756			sx_xunlock(&mqfs->mi_lock);
757
758			vgone(newvpp);
759			vput(newvpp);
760			goto found;
761		}
762	}
763
764	*vpp = newvpp;
765
766	vd = uma_zalloc(mvdata_zone, M_WAITOK);
767	(*vpp)->v_data = vd;
768	vd->mv_vnode = *vpp;
769	vd->mv_node = pn;
770	TASK_INIT(&vd->mv_task, 0, do_recycle, *vpp);
771	LIST_INSERT_HEAD(&pn->mn_vnodes, vd, mv_link);
772	mqnode_addref(pn);
773	switch (pn->mn_type) {
774	case mqfstype_root:
775		(*vpp)->v_vflag = VV_ROOT;
776		/* fall through */
777	case mqfstype_dir:
778	case mqfstype_this:
779	case mqfstype_parent:
780		(*vpp)->v_type = VDIR;
781		break;
782	case mqfstype_file:
783		(*vpp)->v_type = VREG;
784		break;
785	case mqfstype_symlink:
786		(*vpp)->v_type = VLNK;
787		break;
788	case mqfstype_none:
789		KASSERT(0, ("mqfs_allocf called for null node\n"));
790	default:
791		panic("%s has unexpected type: %d", pn->mn_name, pn->mn_type);
792	}
793	sx_xunlock(&mqfs->mi_lock);
794	return (0);
795}
796
797/*
798 * Search a directory entry
799 */
800static struct mqfs_node *
801mqfs_search(struct mqfs_node *pd, const char *name, int len)
802{
803	struct mqfs_node *pn;
804
805	sx_assert(&pd->mn_info->mi_lock, SX_LOCKED);
806	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
807		if (strncmp(pn->mn_name, name, len) == 0 &&
808		    pn->mn_name[len] == '\0')
809			return (pn);
810	}
811	return (NULL);
812}
813
814/*
815 * Look up a file or directory.
816 */
817static int
818mqfs_lookupx(struct vop_cachedlookup_args *ap)
819{
820	struct componentname *cnp;
821	struct vnode *dvp, **vpp;
822	struct mqfs_node *pd;
823	struct mqfs_node *pn;
824	struct mqfs_info *mqfs;
825	int nameiop, flags, error, namelen;
826	char *pname;
827	struct thread *td;
828
829	cnp = ap->a_cnp;
830	vpp = ap->a_vpp;
831	dvp = ap->a_dvp;
832	pname = cnp->cn_nameptr;
833	namelen = cnp->cn_namelen;
834	td = cnp->cn_thread;
835	flags = cnp->cn_flags;
836	nameiop = cnp->cn_nameiop;
837	pd = VTON(dvp);
838	pn = NULL;
839	mqfs = pd->mn_info;
840	*vpp = NULLVP;
841
842	if (dvp->v_type != VDIR)
843		return (ENOTDIR);
844
845	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, cnp->cn_thread);
846	if (error)
847		return (error);
848
849	/* shortcut: check if the name is too long */
850	if (cnp->cn_namelen >= MQFS_NAMELEN)
851		return (ENOENT);
852
853	/* self */
854	if (namelen == 1 && pname[0] == '.') {
855		if ((flags & ISLASTCN) && nameiop != LOOKUP)
856			return (EINVAL);
857		pn = pd;
858		*vpp = dvp;
859		VREF(dvp);
860		return (0);
861	}
862
863	/* parent */
864	if (cnp->cn_flags & ISDOTDOT) {
865		if (dvp->v_vflag & VV_ROOT)
866			return (EIO);
867		if ((flags & ISLASTCN) && nameiop != LOOKUP)
868			return (EINVAL);
869		VOP_UNLOCK(dvp, 0);
870		KASSERT(pd->mn_parent, ("non-root directory has no parent"));
871		pn = pd->mn_parent;
872		error = mqfs_allocv(dvp->v_mount, vpp, pn);
873		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
874		return (error);
875	}
876
877	/* named node */
878	sx_xlock(&mqfs->mi_lock);
879	pn = mqfs_search(pd, pname, namelen);
880	if (pn != NULL)
881		mqnode_addref(pn);
882	sx_xunlock(&mqfs->mi_lock);
883
884	/* found */
885	if (pn != NULL) {
886		/* DELETE */
887		if (nameiop == DELETE && (flags & ISLASTCN)) {
888			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
889			if (error) {
890				mqnode_release(pn);
891				return (error);
892			}
893			if (*vpp == dvp) {
894				VREF(dvp);
895				*vpp = dvp;
896				mqnode_release(pn);
897				return (0);
898			}
899		}
900
901		/* allocate vnode */
902		error = mqfs_allocv(dvp->v_mount, vpp, pn);
903		mqnode_release(pn);
904		if (error == 0 && cnp->cn_flags & MAKEENTRY)
905			cache_enter(dvp, *vpp, cnp);
906		return (error);
907	}
908
909	/* not found */
910
911	/* will create a new entry in the directory ? */
912	if ((nameiop == CREATE || nameiop == RENAME) && (flags & LOCKPARENT)
913	    && (flags & ISLASTCN)) {
914		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
915		if (error)
916			return (error);
917		cnp->cn_flags |= SAVENAME;
918		return (EJUSTRETURN);
919	}
920	return (ENOENT);
921}
922
923#if 0
924struct vop_lookup_args {
925	struct vop_generic_args a_gen;
926	struct vnode *a_dvp;
927	struct vnode **a_vpp;
928	struct componentname *a_cnp;
929};
930#endif
931
932/*
933 * vnode lookup operation
934 */
935static int
936mqfs_lookup(struct vop_cachedlookup_args *ap)
937{
938	int rc;
939
940	rc = mqfs_lookupx(ap);
941	return (rc);
942}
943
944#if 0
945struct vop_create_args {
946	struct vnode *a_dvp;
947	struct vnode **a_vpp;
948	struct componentname *a_cnp;
949	struct vattr *a_vap;
950};
951#endif
952
953/*
954 * vnode creation operation
955 */
956static int
957mqfs_create(struct vop_create_args *ap)
958{
959	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
960	struct componentname *cnp = ap->a_cnp;
961	struct mqfs_node *pd;
962	struct mqfs_node *pn;
963	struct mqueue *mq;
964	int error;
965
966	pd = VTON(ap->a_dvp);
967	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
968		return (ENOTDIR);
969	mq = mqueue_alloc(NULL);
970	if (mq == NULL)
971		return (EAGAIN);
972	sx_xlock(&mqfs->mi_lock);
973	if ((cnp->cn_flags & HASBUF) == 0)
974		panic("%s: no name", __func__);
975	pn = mqfs_create_file(pd, cnp->cn_nameptr, cnp->cn_namelen,
976		cnp->cn_cred, ap->a_vap->va_mode);
977	if (pn == NULL) {
978		sx_xunlock(&mqfs->mi_lock);
979		error = ENOSPC;
980	} else {
981		mqnode_addref(pn);
982		sx_xunlock(&mqfs->mi_lock);
983		error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
984		mqnode_release(pn);
985		if (error)
986			mqfs_destroy(pn);
987		else
988			pn->mn_data = mq;
989	}
990	if (error)
991		mqueue_free(mq);
992	return (error);
993}
994
995/*
996 * Remove an entry
997 */
998static
999int do_unlink(struct mqfs_node *pn, struct ucred *ucred)
1000{
1001	struct mqfs_node *parent;
1002	struct mqfs_vdata *vd;
1003	int error = 0;
1004
1005	sx_assert(&pn->mn_info->mi_lock, SX_LOCKED);
1006
1007	if (ucred->cr_uid != pn->mn_uid &&
1008	    (error = priv_check_cred(ucred, PRIV_MQ_ADMIN, 0)) != 0)
1009		error = EACCES;
1010	else if (!pn->mn_deleted) {
1011		parent = pn->mn_parent;
1012		pn->mn_parent = NULL;
1013		pn->mn_deleted = 1;
1014		LIST_REMOVE(pn, mn_sibling);
1015		LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
1016			cache_purge(vd->mv_vnode);
1017			vhold(vd->mv_vnode);
1018			taskqueue_enqueue(taskqueue_thread, &vd->mv_task);
1019		}
1020		mqnode_release(pn);
1021		mqnode_release(parent);
1022	} else
1023		error = ENOENT;
1024	return (error);
1025}
1026
1027#if 0
1028struct vop_remove_args {
1029	struct vnode *a_dvp;
1030	struct vnode *a_vp;
1031	struct componentname *a_cnp;
1032};
1033#endif
1034
1035/*
1036 * vnode removal operation
1037 */
1038static int
1039mqfs_remove(struct vop_remove_args *ap)
1040{
1041	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1042	struct mqfs_node *pn;
1043	int error;
1044
1045	if (ap->a_vp->v_type == VDIR)
1046                return (EPERM);
1047	pn = VTON(ap->a_vp);
1048	sx_xlock(&mqfs->mi_lock);
1049	error = do_unlink(pn, ap->a_cnp->cn_cred);
1050	sx_xunlock(&mqfs->mi_lock);
1051	return (error);
1052}
1053
1054#if 0
1055struct vop_inactive_args {
1056	struct vnode *a_vp;
1057	struct thread *a_td;
1058};
1059#endif
1060
1061static int
1062mqfs_inactive(struct vop_inactive_args *ap)
1063{
1064	struct mqfs_node *pn = VTON(ap->a_vp);
1065
1066	if (pn->mn_deleted)
1067		vrecycle(ap->a_vp, ap->a_td);
1068	return (0);
1069}
1070
1071#if 0
1072struct vop_reclaim_args {
1073	struct vop_generic_args a_gen;
1074	struct vnode *a_vp;
1075	struct thread *a_td;
1076};
1077#endif
1078
1079static int
1080mqfs_reclaim(struct vop_reclaim_args *ap)
1081{
1082	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_vp->v_mount);
1083	struct vnode *vp = ap->a_vp;
1084	struct mqfs_node *pn;
1085	struct mqfs_vdata *vd;
1086
1087	vd = vp->v_data;
1088	pn = vd->mv_node;
1089	sx_xlock(&mqfs->mi_lock);
1090	vp->v_data = NULL;
1091	LIST_REMOVE(vd, mv_link);
1092	uma_zfree(mvdata_zone, vd);
1093	mqnode_release(pn);
1094	sx_xunlock(&mqfs->mi_lock);
1095	return (0);
1096}
1097
1098#if 0
1099struct vop_open_args {
1100	struct vop_generic_args a_gen;
1101	struct vnode *a_vp;
1102	int a_mode;
1103	struct ucred *a_cred;
1104	struct thread *a_td;
1105	struct file *a_fp;
1106};
1107#endif
1108
1109static int
1110mqfs_open(struct vop_open_args *ap)
1111{
1112	return (0);
1113}
1114
1115#if 0
1116struct vop_close_args {
1117	struct vop_generic_args a_gen;
1118	struct vnode *a_vp;
1119	int a_fflag;
1120	struct ucred *a_cred;
1121	struct thread *a_td;
1122};
1123#endif
1124
1125static int
1126mqfs_close(struct vop_close_args *ap)
1127{
1128	return (0);
1129}
1130
1131#if 0
1132struct vop_access_args {
1133	struct vop_generic_args a_gen;
1134	struct vnode *a_vp;
1135	accmode_t a_accmode;
1136	struct ucred *a_cred;
1137	struct thread *a_td;
1138};
1139#endif
1140
1141/*
1142 * Verify permissions
1143 */
1144static int
1145mqfs_access(struct vop_access_args *ap)
1146{
1147	struct vnode *vp = ap->a_vp;
1148	struct vattr vattr;
1149	int error;
1150
1151	error = VOP_GETATTR(vp, &vattr, ap->a_cred);
1152	if (error)
1153		return (error);
1154	error = vaccess(vp->v_type, vattr.va_mode, vattr.va_uid,
1155	    vattr.va_gid, ap->a_accmode, ap->a_cred, NULL);
1156	return (error);
1157}
1158
1159#if 0
1160struct vop_getattr_args {
1161	struct vop_generic_args a_gen;
1162	struct vnode *a_vp;
1163	struct vattr *a_vap;
1164	struct ucred *a_cred;
1165};
1166#endif
1167
1168/*
1169 * Get file attributes
1170 */
1171static int
1172mqfs_getattr(struct vop_getattr_args *ap)
1173{
1174	struct vnode *vp = ap->a_vp;
1175	struct mqfs_node *pn = VTON(vp);
1176	struct vattr *vap = ap->a_vap;
1177	int error = 0;
1178
1179	vap->va_type = vp->v_type;
1180	vap->va_mode = pn->mn_mode;
1181	vap->va_nlink = 1;
1182	vap->va_uid = pn->mn_uid;
1183	vap->va_gid = pn->mn_gid;
1184	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
1185	vap->va_fileid = pn->mn_fileno;
1186	vap->va_size = 0;
1187	vap->va_blocksize = PAGE_SIZE;
1188	vap->va_bytes = vap->va_size = 0;
1189	vap->va_atime = pn->mn_atime;
1190	vap->va_mtime = pn->mn_mtime;
1191	vap->va_ctime = pn->mn_ctime;
1192	vap->va_birthtime = pn->mn_birth;
1193	vap->va_gen = 0;
1194	vap->va_flags = 0;
1195	vap->va_rdev = NODEV;
1196	vap->va_bytes = 0;
1197	vap->va_filerev = 0;
1198	return (error);
1199}
1200
1201#if 0
1202struct vop_setattr_args {
1203	struct vop_generic_args a_gen;
1204	struct vnode *a_vp;
1205	struct vattr *a_vap;
1206	struct ucred *a_cred;
1207};
1208#endif
1209/*
1210 * Set attributes
1211 */
1212static int
1213mqfs_setattr(struct vop_setattr_args *ap)
1214{
1215	struct mqfs_node *pn;
1216	struct vattr *vap;
1217	struct vnode *vp;
1218	struct thread *td;
1219	int c, error;
1220	uid_t uid;
1221	gid_t gid;
1222
1223	td = curthread;
1224	vap = ap->a_vap;
1225	vp = ap->a_vp;
1226	if ((vap->va_type != VNON) ||
1227	    (vap->va_nlink != VNOVAL) ||
1228	    (vap->va_fsid != VNOVAL) ||
1229	    (vap->va_fileid != VNOVAL) ||
1230	    (vap->va_blocksize != VNOVAL) ||
1231	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
1232	    (vap->va_rdev != VNOVAL) ||
1233	    ((int)vap->va_bytes != VNOVAL) ||
1234	    (vap->va_gen != VNOVAL)) {
1235		return (EINVAL);
1236	}
1237
1238	pn = VTON(vp);
1239
1240	error = c = 0;
1241	if (vap->va_uid == (uid_t)VNOVAL)
1242		uid = pn->mn_uid;
1243	else
1244		uid = vap->va_uid;
1245	if (vap->va_gid == (gid_t)VNOVAL)
1246		gid = pn->mn_gid;
1247	else
1248		gid = vap->va_gid;
1249
1250	if (uid != pn->mn_uid || gid != pn->mn_gid) {
1251		/*
1252		 * To modify the ownership of a file, must possess VADMIN
1253		 * for that file.
1254		 */
1255		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)))
1256			return (error);
1257
1258		/*
1259		 * XXXRW: Why is there a privilege check here: shouldn't the
1260		 * check in VOP_ACCESS() be enough?  Also, are the group bits
1261		 * below definitely right?
1262		 */
1263		if (((ap->a_cred->cr_uid != pn->mn_uid) || uid != pn->mn_uid ||
1264		    (gid != pn->mn_gid && !groupmember(gid, ap->a_cred))) &&
1265		    (error = priv_check(td, PRIV_MQ_ADMIN)) != 0)
1266			return (error);
1267		pn->mn_uid = uid;
1268		pn->mn_gid = gid;
1269		c = 1;
1270	}
1271
1272	if (vap->va_mode != (mode_t)VNOVAL) {
1273		if ((ap->a_cred->cr_uid != pn->mn_uid) &&
1274		    (error = priv_check(td, PRIV_MQ_ADMIN)))
1275			return (error);
1276		pn->mn_mode = vap->va_mode;
1277		c = 1;
1278	}
1279
1280	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1281		/* See the comment in ufs_vnops::ufs_setattr(). */
1282		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)) &&
1283		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
1284		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, td))))
1285			return (error);
1286		if (vap->va_atime.tv_sec != VNOVAL) {
1287			pn->mn_atime = vap->va_atime;
1288		}
1289		if (vap->va_mtime.tv_sec != VNOVAL) {
1290			pn->mn_mtime = vap->va_mtime;
1291		}
1292		c = 1;
1293	}
1294	if (c) {
1295		vfs_timestamp(&pn->mn_ctime);
1296	}
1297	return (0);
1298}
1299
1300#if 0
1301struct vop_read_args {
1302	struct vop_generic_args a_gen;
1303	struct vnode *a_vp;
1304	struct uio *a_uio;
1305	int a_ioflag;
1306	struct ucred *a_cred;
1307};
1308#endif
1309
1310/*
1311 * Read from a file
1312 */
1313static int
1314mqfs_read(struct vop_read_args *ap)
1315{
1316	char buf[80];
1317	struct vnode *vp = ap->a_vp;
1318	struct uio *uio = ap->a_uio;
1319	struct mqfs_node *pn;
1320	struct mqueue *mq;
1321	int len, error;
1322
1323	if (vp->v_type != VREG)
1324		return (EINVAL);
1325
1326	pn = VTON(vp);
1327	mq = VTOMQ(vp);
1328	snprintf(buf, sizeof(buf),
1329		"QSIZE:%-10ld MAXMSG:%-10ld CURMSG:%-10ld MSGSIZE:%-10ld\n",
1330		mq->mq_totalbytes,
1331		mq->mq_maxmsg,
1332		mq->mq_curmsgs,
1333		mq->mq_msgsize);
1334	buf[sizeof(buf)-1] = '\0';
1335	len = strlen(buf);
1336	error = uiomove_frombuf(buf, len, uio);
1337	return (error);
1338}
1339
1340#if 0
1341struct vop_readdir_args {
1342	struct vop_generic_args a_gen;
1343	struct vnode *a_vp;
1344	struct uio *a_uio;
1345	struct ucred *a_cred;
1346	int *a_eofflag;
1347	int *a_ncookies;
1348	u_long **a_cookies;
1349};
1350#endif
1351
1352/*
1353 * Return directory entries.
1354 */
1355static int
1356mqfs_readdir(struct vop_readdir_args *ap)
1357{
1358	struct vnode *vp;
1359	struct mqfs_info *mi;
1360	struct mqfs_node *pd;
1361	struct mqfs_node *pn;
1362	struct dirent entry;
1363	struct uio *uio;
1364	int *tmp_ncookies = NULL;
1365	off_t offset;
1366	int error, i;
1367
1368	vp = ap->a_vp;
1369	mi = VFSTOMQFS(vp->v_mount);
1370	pd = VTON(vp);
1371	uio = ap->a_uio;
1372
1373	if (vp->v_type != VDIR)
1374		return (ENOTDIR);
1375
1376	if (uio->uio_offset < 0)
1377		return (EINVAL);
1378
1379	if (ap->a_ncookies != NULL) {
1380		tmp_ncookies = ap->a_ncookies;
1381		*ap->a_ncookies = 0;
1382		ap->a_ncookies = NULL;
1383        }
1384
1385	error = 0;
1386	offset = 0;
1387
1388	sx_xlock(&mi->mi_lock);
1389
1390	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
1391		entry.d_reclen = sizeof(entry);
1392		if (!pn->mn_fileno)
1393			mqfs_fileno_alloc(mi, pn);
1394		entry.d_fileno = pn->mn_fileno;
1395		for (i = 0; i < MQFS_NAMELEN - 1 && pn->mn_name[i] != '\0'; ++i)
1396			entry.d_name[i] = pn->mn_name[i];
1397		entry.d_name[i] = 0;
1398		entry.d_namlen = i;
1399		switch (pn->mn_type) {
1400		case mqfstype_root:
1401		case mqfstype_dir:
1402		case mqfstype_this:
1403		case mqfstype_parent:
1404			entry.d_type = DT_DIR;
1405			break;
1406		case mqfstype_file:
1407			entry.d_type = DT_REG;
1408			break;
1409		case mqfstype_symlink:
1410			entry.d_type = DT_LNK;
1411			break;
1412		default:
1413			panic("%s has unexpected node type: %d", pn->mn_name,
1414				pn->mn_type);
1415		}
1416		if (entry.d_reclen > uio->uio_resid)
1417                        break;
1418		if (offset >= uio->uio_offset) {
1419			error = vfs_read_dirent(ap, &entry, offset);
1420                        if (error)
1421                                break;
1422                }
1423                offset += entry.d_reclen;
1424	}
1425	sx_xunlock(&mi->mi_lock);
1426
1427	uio->uio_offset = offset;
1428
1429	if (tmp_ncookies != NULL)
1430		ap->a_ncookies = tmp_ncookies;
1431
1432	return (error);
1433}
1434
1435#ifdef notyet
1436
1437#if 0
1438struct vop_mkdir_args {
1439	struct vnode *a_dvp;
1440	struvt vnode **a_vpp;
1441	struvt componentname *a_cnp;
1442	struct vattr *a_vap;
1443};
1444#endif
1445
1446/*
1447 * Create a directory.
1448 */
1449static int
1450mqfs_mkdir(struct vop_mkdir_args *ap)
1451{
1452	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1453	struct componentname *cnp = ap->a_cnp;
1454	struct mqfs_node *pd = VTON(ap->a_dvp);
1455	struct mqfs_node *pn;
1456	int error;
1457
1458	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
1459		return (ENOTDIR);
1460	sx_xlock(&mqfs->mi_lock);
1461	if ((cnp->cn_flags & HASBUF) == 0)
1462		panic("%s: no name", __func__);
1463	pn = mqfs_create_dir(pd, cnp->cn_nameptr, cnp->cn_namelen,
1464		ap->a_vap->cn_cred, ap->a_vap->va_mode);
1465	if (pn != NULL)
1466		mqnode_addref(pn);
1467	sx_xunlock(&mqfs->mi_lock);
1468	if (pn == NULL) {
1469		error = ENOSPC;
1470	} else {
1471		error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
1472		mqnode_release(pn);
1473	}
1474	return (error);
1475}
1476
1477#if 0
1478struct vop_rmdir_args {
1479	struct vnode *a_dvp;
1480	struct vnode *a_vp;
1481	struct componentname *a_cnp;
1482};
1483#endif
1484
1485/*
1486 * Remove a directory.
1487 */
1488static int
1489mqfs_rmdir(struct vop_rmdir_args *ap)
1490{
1491	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1492	struct mqfs_node *pn = VTON(ap->a_vp);
1493	struct mqfs_node *pt;
1494
1495	if (pn->mn_type != mqfstype_dir)
1496		return (ENOTDIR);
1497
1498	sx_xlock(&mqfs->mi_lock);
1499	if (pn->mn_deleted) {
1500		sx_xunlock(&mqfs->mi_lock);
1501		return (ENOENT);
1502	}
1503
1504	pt = LIST_FIRST(&pn->mn_children);
1505	pt = LIST_NEXT(pt, mn_sibling);
1506	pt = LIST_NEXT(pt, mn_sibling);
1507	if (pt != NULL) {
1508		sx_xunlock(&mqfs->mi_lock);
1509		return (ENOTEMPTY);
1510	}
1511	pt = pn->mn_parent;
1512	pn->mn_parent = NULL;
1513	pn->mn_deleted = 1;
1514	LIST_REMOVE(pn, mn_sibling);
1515	mqnode_release(pn);
1516	mqnode_release(pt);
1517	sx_xunlock(&mqfs->mi_lock);
1518	cache_purge(ap->a_vp);
1519	return (0);
1520}
1521
1522#endif /* notyet */
1523
1524/*
1525 * Allocate a message queue
1526 */
1527static struct mqueue *
1528mqueue_alloc(const struct mq_attr *attr)
1529{
1530	struct mqueue *mq;
1531
1532	if (curmq >= maxmq)
1533		return (NULL);
1534	mq = uma_zalloc(mqueue_zone, M_WAITOK | M_ZERO);
1535	TAILQ_INIT(&mq->mq_msgq);
1536	if (attr != NULL) {
1537		mq->mq_maxmsg = attr->mq_maxmsg;
1538		mq->mq_msgsize = attr->mq_msgsize;
1539	} else {
1540		mq->mq_maxmsg = default_maxmsg;
1541		mq->mq_msgsize = default_msgsize;
1542	}
1543	mtx_init(&mq->mq_mutex, "mqueue lock", NULL, MTX_DEF);
1544	knlist_init_mtx(&mq->mq_rsel.si_note, &mq->mq_mutex);
1545	knlist_init_mtx(&mq->mq_wsel.si_note, &mq->mq_mutex);
1546	atomic_add_int(&curmq, 1);
1547	return (mq);
1548}
1549
1550/*
1551 * Destroy a message queue
1552 */
1553static void
1554mqueue_free(struct mqueue *mq)
1555{
1556	struct mqueue_msg *msg;
1557
1558	while ((msg = TAILQ_FIRST(&mq->mq_msgq)) != NULL) {
1559		TAILQ_REMOVE(&mq->mq_msgq, msg, msg_link);
1560		free(msg, M_MQUEUEDATA);
1561	}
1562
1563	mtx_destroy(&mq->mq_mutex);
1564	knlist_destroy(&mq->mq_rsel.si_note);
1565	knlist_destroy(&mq->mq_wsel.si_note);
1566	uma_zfree(mqueue_zone, mq);
1567	atomic_add_int(&curmq, -1);
1568}
1569
1570/*
1571 * Load a message from user space
1572 */
1573static struct mqueue_msg *
1574mqueue_loadmsg(const char *msg_ptr, size_t msg_size, int msg_prio)
1575{
1576	struct mqueue_msg *msg;
1577	size_t len;
1578	int error;
1579
1580	len = sizeof(struct mqueue_msg) + msg_size;
1581	msg = malloc(len, M_MQUEUEDATA, M_WAITOK);
1582	error = copyin(msg_ptr, ((char *)msg) + sizeof(struct mqueue_msg),
1583	    msg_size);
1584	if (error) {
1585		free(msg, M_MQUEUEDATA);
1586		msg = NULL;
1587	} else {
1588		msg->msg_size = msg_size;
1589		msg->msg_prio = msg_prio;
1590	}
1591	return (msg);
1592}
1593
1594/*
1595 * Save a message to user space
1596 */
1597static int
1598mqueue_savemsg(struct mqueue_msg *msg, char *msg_ptr, int *msg_prio)
1599{
1600	int error;
1601
1602	error = copyout(((char *)msg) + sizeof(*msg), msg_ptr,
1603		msg->msg_size);
1604	if (error == 0 && msg_prio != NULL)
1605		error = copyout(&msg->msg_prio, msg_prio, sizeof(int));
1606	return (error);
1607}
1608
1609/*
1610 * Free a message's memory
1611 */
1612static __inline void
1613mqueue_freemsg(struct mqueue_msg *msg)
1614{
1615	free(msg, M_MQUEUEDATA);
1616}
1617
1618/*
1619 * Send a message. if waitok is false, thread will not be
1620 * blocked if there is no data in queue, otherwise, absolute
1621 * time will be checked.
1622 */
1623int
1624mqueue_send(struct mqueue *mq, const char *msg_ptr,
1625	size_t msg_len, unsigned msg_prio, int waitok,
1626	const struct timespec *abs_timeout)
1627{
1628	struct mqueue_msg *msg;
1629	struct timespec ts, ts2;
1630	struct timeval tv;
1631	int error;
1632
1633	if (msg_prio >= MQ_PRIO_MAX)
1634		return (EINVAL);
1635	if (msg_len > mq->mq_msgsize)
1636		return (EMSGSIZE);
1637	msg = mqueue_loadmsg(msg_ptr, msg_len, msg_prio);
1638	if (msg == NULL)
1639		return (EFAULT);
1640
1641	/* O_NONBLOCK case */
1642	if (!waitok) {
1643		error = _mqueue_send(mq, msg, -1);
1644		if (error)
1645			goto bad;
1646		return (0);
1647	}
1648
1649	/* we allow a null timeout (wait forever) */
1650	if (abs_timeout == NULL) {
1651		error = _mqueue_send(mq, msg, 0);
1652		if (error)
1653			goto bad;
1654		return (0);
1655	}
1656
1657	/* send it before checking time */
1658	error = _mqueue_send(mq, msg, -1);
1659	if (error == 0)
1660		return (0);
1661
1662	if (error != EAGAIN)
1663		goto bad;
1664
1665	if (abs_timeout->tv_nsec >= 1000000000 || abs_timeout->tv_nsec < 0) {
1666		error = EINVAL;
1667		goto bad;
1668	}
1669	for (;;) {
1670		ts2 = *abs_timeout;
1671		getnanotime(&ts);
1672		timespecsub(&ts2, &ts);
1673		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
1674			error = ETIMEDOUT;
1675			break;
1676		}
1677		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
1678		error = _mqueue_send(mq, msg, tvtohz(&tv));
1679		if (error != ETIMEDOUT)
1680			break;
1681	}
1682	if (error == 0)
1683		return (0);
1684bad:
1685	mqueue_freemsg(msg);
1686	return (error);
1687}
1688
1689/*
1690 * Common routine to send a message
1691 */
1692static int
1693_mqueue_send(struct mqueue *mq, struct mqueue_msg *msg, int timo)
1694{
1695	struct mqueue_msg *msg2;
1696	int error = 0;
1697
1698	mtx_lock(&mq->mq_mutex);
1699	while (mq->mq_curmsgs >= mq->mq_maxmsg && error == 0) {
1700		if (timo < 0) {
1701			mtx_unlock(&mq->mq_mutex);
1702			return (EAGAIN);
1703		}
1704		mq->mq_senders++;
1705		error = msleep(&mq->mq_senders, &mq->mq_mutex,
1706			    PCATCH, "mqsend", timo);
1707		mq->mq_senders--;
1708		if (error == EAGAIN)
1709			error = ETIMEDOUT;
1710	}
1711	if (mq->mq_curmsgs >= mq->mq_maxmsg) {
1712		mtx_unlock(&mq->mq_mutex);
1713		return (error);
1714	}
1715	error = 0;
1716	if (TAILQ_EMPTY(&mq->mq_msgq)) {
1717		TAILQ_INSERT_HEAD(&mq->mq_msgq, msg, msg_link);
1718	} else {
1719		if (msg->msg_prio <= TAILQ_LAST(&mq->mq_msgq, msgq)->msg_prio) {
1720			TAILQ_INSERT_TAIL(&mq->mq_msgq, msg, msg_link);
1721		} else {
1722			TAILQ_FOREACH(msg2, &mq->mq_msgq, msg_link) {
1723				if (msg2->msg_prio < msg->msg_prio)
1724					break;
1725			}
1726			TAILQ_INSERT_BEFORE(msg2, msg, msg_link);
1727		}
1728	}
1729	mq->mq_curmsgs++;
1730	mq->mq_totalbytes += msg->msg_size;
1731	if (mq->mq_receivers)
1732		wakeup_one(&mq->mq_receivers);
1733	else if (mq->mq_notifier != NULL)
1734		mqueue_send_notification(mq);
1735	if (mq->mq_flags & MQ_RSEL) {
1736		mq->mq_flags &= ~MQ_RSEL;
1737		selwakeup(&mq->mq_rsel);
1738	}
1739	KNOTE_LOCKED(&mq->mq_rsel.si_note, 0);
1740	mtx_unlock(&mq->mq_mutex);
1741	return (0);
1742}
1743
1744/*
1745 * Send realtime a signal to process which registered itself
1746 * successfully by mq_notify.
1747 */
1748static void
1749mqueue_send_notification(struct mqueue *mq)
1750{
1751	struct mqueue_notifier *nt;
1752	struct thread *td;
1753	struct proc *p;
1754	int error;
1755
1756	mtx_assert(&mq->mq_mutex, MA_OWNED);
1757	nt = mq->mq_notifier;
1758	if (nt->nt_sigev.sigev_notify != SIGEV_NONE) {
1759		p = nt->nt_proc;
1760		error = sigev_findtd(p, &nt->nt_sigev, &td);
1761		if (error) {
1762			mq->mq_notifier = NULL;
1763			return;
1764		}
1765		if (!KSI_ONQ(&nt->nt_ksi)) {
1766			ksiginfo_set_sigev(&nt->nt_ksi, &nt->nt_sigev);
1767			tdsendsignal(p, td, nt->nt_ksi.ksi_signo, &nt->nt_ksi);
1768		}
1769		PROC_UNLOCK(p);
1770	}
1771	mq->mq_notifier = NULL;
1772}
1773
1774/*
1775 * Get a message. if waitok is false, thread will not be
1776 * blocked if there is no data in queue, otherwise, absolute
1777 * time will be checked.
1778 */
1779int
1780mqueue_receive(struct mqueue *mq, char *msg_ptr,
1781	size_t msg_len, unsigned *msg_prio, int waitok,
1782	const struct timespec *abs_timeout)
1783{
1784	struct mqueue_msg *msg;
1785	struct timespec ts, ts2;
1786	struct timeval tv;
1787	int error;
1788
1789	if (msg_len < mq->mq_msgsize)
1790		return (EMSGSIZE);
1791
1792	/* O_NONBLOCK case */
1793	if (!waitok) {
1794		error = _mqueue_recv(mq, &msg, -1);
1795		if (error)
1796			return (error);
1797		goto received;
1798	}
1799
1800	/* we allow a null timeout (wait forever). */
1801	if (abs_timeout == NULL) {
1802		error = _mqueue_recv(mq, &msg, 0);
1803		if (error)
1804			return (error);
1805		goto received;
1806	}
1807
1808	/* try to get a message before checking time */
1809	error = _mqueue_recv(mq, &msg, -1);
1810	if (error == 0)
1811		goto received;
1812
1813	if (error != EAGAIN)
1814		return (error);
1815
1816	if (abs_timeout->tv_nsec >= 1000000000 || abs_timeout->tv_nsec < 0) {
1817		error = EINVAL;
1818		return (error);
1819	}
1820
1821	for (;;) {
1822		ts2 = *abs_timeout;
1823		getnanotime(&ts);
1824		timespecsub(&ts2, &ts);
1825		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
1826			error = ETIMEDOUT;
1827			return (error);
1828		}
1829		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
1830		error = _mqueue_recv(mq, &msg, tvtohz(&tv));
1831		if (error == 0)
1832			break;
1833		if (error != ETIMEDOUT)
1834			return (error);
1835	}
1836
1837received:
1838	error = mqueue_savemsg(msg, msg_ptr, msg_prio);
1839	if (error == 0) {
1840		curthread->td_retval[0] = msg->msg_size;
1841		curthread->td_retval[1] = 0;
1842	}
1843	mqueue_freemsg(msg);
1844	return (error);
1845}
1846
1847/*
1848 * Common routine to receive a message
1849 */
1850static int
1851_mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg, int timo)
1852{
1853	int error = 0;
1854
1855	mtx_lock(&mq->mq_mutex);
1856	while ((*msg = TAILQ_FIRST(&mq->mq_msgq)) == NULL && error == 0) {
1857		if (timo < 0) {
1858			mtx_unlock(&mq->mq_mutex);
1859			return (EAGAIN);
1860		}
1861		mq->mq_receivers++;
1862		error = msleep(&mq->mq_receivers, &mq->mq_mutex,
1863			    PCATCH, "mqrecv", timo);
1864		mq->mq_receivers--;
1865		if (error == EAGAIN)
1866			error = ETIMEDOUT;
1867	}
1868	if (*msg != NULL) {
1869		error = 0;
1870		TAILQ_REMOVE(&mq->mq_msgq, *msg, msg_link);
1871		mq->mq_curmsgs--;
1872		mq->mq_totalbytes -= (*msg)->msg_size;
1873		if (mq->mq_senders)
1874			wakeup_one(&mq->mq_senders);
1875		if (mq->mq_flags & MQ_WSEL) {
1876			mq->mq_flags &= ~MQ_WSEL;
1877			selwakeup(&mq->mq_wsel);
1878		}
1879		KNOTE_LOCKED(&mq->mq_wsel.si_note, 0);
1880	}
1881	if (mq->mq_notifier != NULL && mq->mq_receivers == 0 &&
1882	    !TAILQ_EMPTY(&mq->mq_msgq)) {
1883		mqueue_send_notification(mq);
1884	}
1885	mtx_unlock(&mq->mq_mutex);
1886	return (error);
1887}
1888
1889static __inline struct mqueue_notifier *
1890notifier_alloc(void)
1891{
1892	return (uma_zalloc(mqnoti_zone, M_WAITOK | M_ZERO));
1893}
1894
1895static __inline void
1896notifier_free(struct mqueue_notifier *p)
1897{
1898	uma_zfree(mqnoti_zone, p);
1899}
1900
1901static struct mqueue_notifier *
1902notifier_search(struct proc *p, int fd)
1903{
1904	struct mqueue_notifier *nt;
1905
1906	LIST_FOREACH(nt, &p->p_mqnotifier, nt_link) {
1907		if (nt->nt_ksi.ksi_mqd == fd)
1908			break;
1909	}
1910	return (nt);
1911}
1912
1913static __inline void
1914notifier_insert(struct proc *p, struct mqueue_notifier *nt)
1915{
1916	LIST_INSERT_HEAD(&p->p_mqnotifier, nt, nt_link);
1917}
1918
1919static __inline void
1920notifier_delete(struct proc *p, struct mqueue_notifier *nt)
1921{
1922	LIST_REMOVE(nt, nt_link);
1923	notifier_free(nt);
1924}
1925
1926static void
1927notifier_remove(struct proc *p, struct mqueue *mq, int fd)
1928{
1929	struct mqueue_notifier *nt;
1930
1931	mtx_assert(&mq->mq_mutex, MA_OWNED);
1932	PROC_LOCK(p);
1933	nt = notifier_search(p, fd);
1934	if (nt != NULL) {
1935		if (mq->mq_notifier == nt)
1936			mq->mq_notifier = NULL;
1937		sigqueue_take(&nt->nt_ksi);
1938		notifier_delete(p, nt);
1939	}
1940	PROC_UNLOCK(p);
1941}
1942
1943static int
1944kern_kmq_open(struct thread *td, const char *upath, int flags, mode_t mode,
1945    const struct mq_attr *attr)
1946{
1947	char path[MQFS_NAMELEN + 1];
1948	struct mqfs_node *pn;
1949	struct filedesc *fdp;
1950	struct file *fp;
1951	struct mqueue *mq;
1952	int fd, error, len, cmode;
1953
1954	fdp = td->td_proc->p_fd;
1955	cmode = (((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT);
1956	mq = NULL;
1957	if ((flags & O_CREAT) != 0 && attr != NULL) {
1958		if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > maxmsg)
1959			return (EINVAL);
1960		if (attr->mq_msgsize <= 0 || attr->mq_msgsize > maxmsgsize)
1961			return (EINVAL);
1962	}
1963
1964	error = copyinstr(upath, path, MQFS_NAMELEN + 1, NULL);
1965        if (error)
1966		return (error);
1967
1968	/*
1969	 * The first character of name must be a slash  (/) character
1970	 * and the remaining characters of name cannot include any slash
1971	 * characters.
1972	 */
1973	len = strlen(path);
1974	if (len < 2  || path[0] != '/' || index(path + 1, '/') != NULL)
1975		return (EINVAL);
1976
1977	error = falloc(td, &fp, &fd, 0);
1978	if (error)
1979		return (error);
1980
1981	sx_xlock(&mqfs_data.mi_lock);
1982	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1);
1983	if (pn == NULL) {
1984		if (!(flags & O_CREAT)) {
1985			error = ENOENT;
1986		} else {
1987			mq = mqueue_alloc(attr);
1988			if (mq == NULL) {
1989				error = ENFILE;
1990			} else {
1991				pn = mqfs_create_file(mqfs_data.mi_root,
1992				         path + 1, len - 1, td->td_ucred,
1993					 cmode);
1994				if (pn == NULL) {
1995					error = ENOSPC;
1996					mqueue_free(mq);
1997				}
1998			}
1999		}
2000
2001		if (error == 0) {
2002			pn->mn_data = mq;
2003		}
2004	} else {
2005		if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) {
2006			error = EEXIST;
2007		} else {
2008			accmode_t accmode = 0;
2009
2010			if (flags & FREAD)
2011				accmode |= VREAD;
2012			if (flags & FWRITE)
2013				accmode |= VWRITE;
2014			error = vaccess(VREG, pn->mn_mode, pn->mn_uid,
2015				    pn->mn_gid, accmode, td->td_ucred, NULL);
2016		}
2017	}
2018
2019	if (error) {
2020		sx_xunlock(&mqfs_data.mi_lock);
2021		fdclose(fdp, fp, fd, td);
2022		fdrop(fp, td);
2023		return (error);
2024	}
2025
2026	mqnode_addref(pn);
2027	sx_xunlock(&mqfs_data.mi_lock);
2028
2029	finit(fp, flags & (FREAD | FWRITE | O_NONBLOCK), DTYPE_MQUEUE, pn,
2030	    &mqueueops);
2031
2032	FILEDESC_XLOCK(fdp);
2033	if (fdp->fd_ofiles[fd] == fp)
2034		fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
2035	FILEDESC_XUNLOCK(fdp);
2036	td->td_retval[0] = fd;
2037	fdrop(fp, td);
2038	return (0);
2039}
2040
2041/*
2042 * Syscall to open a message queue.
2043 */
2044int
2045kmq_open(struct thread *td, struct kmq_open_args *uap)
2046{
2047	struct mq_attr attr;
2048	int flags, error;
2049
2050	if ((uap->flags & O_ACCMODE) == O_ACCMODE)
2051		return (EINVAL);
2052	flags = FFLAGS(uap->flags);
2053	if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
2054		error = copyin(uap->attr, &attr, sizeof(attr));
2055		if (error)
2056			return (error);
2057	}
2058	return (kern_kmq_open(td, uap->path, flags, uap->mode,
2059	    uap->attr != NULL ? &attr : NULL));
2060}
2061
2062/*
2063 * Syscall to unlink a message queue.
2064 */
2065int
2066kmq_unlink(struct thread *td, struct kmq_unlink_args *uap)
2067{
2068	char path[MQFS_NAMELEN+1];
2069	struct mqfs_node *pn;
2070	int error, len;
2071
2072	error = copyinstr(uap->path, path, MQFS_NAMELEN + 1, NULL);
2073        if (error)
2074		return (error);
2075
2076	len = strlen(path);
2077	if (len < 2  || path[0] != '/' || index(path + 1, '/') != NULL)
2078		return (EINVAL);
2079
2080	sx_xlock(&mqfs_data.mi_lock);
2081	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1);
2082	if (pn != NULL)
2083		error = do_unlink(pn, td->td_ucred);
2084	else
2085		error = ENOENT;
2086	sx_xunlock(&mqfs_data.mi_lock);
2087	return (error);
2088}
2089
2090typedef int (*_fgetf)(struct thread *, int, struct file **);
2091
2092/*
2093 * Get message queue by giving file slot
2094 */
2095static int
2096_getmq(struct thread *td, int fd, _fgetf func,
2097       struct file **fpp, struct mqfs_node **ppn, struct mqueue **pmq)
2098{
2099	struct mqfs_node *pn;
2100	int error;
2101
2102	error = func(td, fd, fpp);
2103	if (error)
2104		return (error);
2105	if (&mqueueops != (*fpp)->f_ops) {
2106		fdrop(*fpp, td);
2107		return (EBADF);
2108	}
2109	pn = (*fpp)->f_data;
2110	if (ppn)
2111		*ppn = pn;
2112	if (pmq)
2113		*pmq = pn->mn_data;
2114	return (0);
2115}
2116
2117static __inline int
2118getmq(struct thread *td, int fd, struct file **fpp, struct mqfs_node **ppn,
2119	struct mqueue **pmq)
2120{
2121	return _getmq(td, fd, fget, fpp, ppn, pmq);
2122}
2123
2124static __inline int
2125getmq_read(struct thread *td, int fd, struct file **fpp,
2126	 struct mqfs_node **ppn, struct mqueue **pmq)
2127{
2128	return _getmq(td, fd, fget_read, fpp, ppn, pmq);
2129}
2130
2131static __inline int
2132getmq_write(struct thread *td, int fd, struct file **fpp,
2133	struct mqfs_node **ppn, struct mqueue **pmq)
2134{
2135	return _getmq(td, fd, fget_write, fpp, ppn, pmq);
2136}
2137
2138static int
2139kern_kmq_setattr(struct thread *td, int mqd, const struct mq_attr *attr,
2140    struct mq_attr *oattr)
2141{
2142	struct mqueue *mq;
2143	struct file *fp;
2144	u_int oflag, flag;
2145	int error;
2146
2147	if (attr != NULL && (attr->mq_flags & ~O_NONBLOCK) != 0)
2148		return (EINVAL);
2149	error = getmq(td, mqd, &fp, NULL, &mq);
2150	if (error)
2151		return (error);
2152	oattr->mq_maxmsg  = mq->mq_maxmsg;
2153	oattr->mq_msgsize = mq->mq_msgsize;
2154	oattr->mq_curmsgs = mq->mq_curmsgs;
2155	if (attr != NULL) {
2156		do {
2157			oflag = flag = fp->f_flag;
2158			flag &= ~O_NONBLOCK;
2159			flag |= (attr->mq_flags & O_NONBLOCK);
2160		} while (atomic_cmpset_int(&fp->f_flag, oflag, flag) == 0);
2161	} else
2162		oflag = fp->f_flag;
2163	oattr->mq_flags = (O_NONBLOCK & oflag);
2164	fdrop(fp, td);
2165	return (error);
2166}
2167
2168int
2169kmq_setattr(struct thread *td, struct kmq_setattr_args *uap)
2170{
2171	struct mq_attr attr, oattr;
2172	int error;
2173
2174	if (uap->attr != NULL) {
2175		error = copyin(uap->attr, &attr, sizeof(attr));
2176		if (error != 0)
2177			return (error);
2178	}
2179	error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
2180	    &oattr);
2181	if (error != 0)
2182		return (error);
2183	if (uap->oattr != NULL)
2184		error = copyout(&oattr, uap->oattr, sizeof(oattr));
2185	return (error);
2186}
2187
2188int
2189kmq_timedreceive(struct thread *td, struct kmq_timedreceive_args *uap)
2190{
2191	struct mqueue *mq;
2192	struct file *fp;
2193	struct timespec *abs_timeout, ets;
2194	int error;
2195	int waitok;
2196
2197	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
2198	if (error)
2199		return (error);
2200	if (uap->abs_timeout != NULL) {
2201		error = copyin(uap->abs_timeout, &ets, sizeof(ets));
2202		if (error != 0)
2203			return (error);
2204		abs_timeout = &ets;
2205	} else
2206		abs_timeout = NULL;
2207	waitok = !(fp->f_flag & O_NONBLOCK);
2208	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
2209		uap->msg_prio, waitok, abs_timeout);
2210	fdrop(fp, td);
2211	return (error);
2212}
2213
2214int
2215kmq_timedsend(struct thread *td, struct kmq_timedsend_args *uap)
2216{
2217	struct mqueue *mq;
2218	struct file *fp;
2219	struct timespec *abs_timeout, ets;
2220	int error, waitok;
2221
2222	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
2223	if (error)
2224		return (error);
2225	if (uap->abs_timeout != NULL) {
2226		error = copyin(uap->abs_timeout, &ets, sizeof(ets));
2227		if (error != 0)
2228			return (error);
2229		abs_timeout = &ets;
2230	} else
2231		abs_timeout = NULL;
2232	waitok = !(fp->f_flag & O_NONBLOCK);
2233	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
2234		uap->msg_prio, waitok, abs_timeout);
2235	fdrop(fp, td);
2236	return (error);
2237}
2238
2239int
2240kmq_notify(struct thread *td, struct kmq_notify_args *uap)
2241{
2242	struct sigevent ev;
2243	struct filedesc *fdp;
2244	struct proc *p;
2245	struct mqueue *mq;
2246	struct file *fp;
2247	struct mqueue_notifier *nt, *newnt = NULL;
2248	int error;
2249
2250	p = td->td_proc;
2251	fdp = td->td_proc->p_fd;
2252	if (uap->sigev) {
2253		error = copyin(uap->sigev, &ev, sizeof(ev));
2254		if (error)
2255			return (error);
2256		if (ev.sigev_notify != SIGEV_SIGNAL &&
2257		    ev.sigev_notify != SIGEV_THREAD_ID &&
2258		    ev.sigev_notify != SIGEV_NONE)
2259			return (EINVAL);
2260		if ((ev.sigev_notify == SIGEV_SIGNAL ||
2261		     ev.sigev_notify == SIGEV_THREAD_ID) &&
2262			!_SIG_VALID(ev.sigev_signo))
2263			return (EINVAL);
2264	}
2265	error = getmq(td, uap->mqd, &fp, NULL, &mq);
2266	if (error)
2267		return (error);
2268again:
2269	FILEDESC_SLOCK(fdp);
2270	if (fget_locked(fdp, uap->mqd) != fp) {
2271		FILEDESC_SUNLOCK(fdp);
2272		error = EBADF;
2273		goto out;
2274	}
2275	mtx_lock(&mq->mq_mutex);
2276	FILEDESC_SUNLOCK(fdp);
2277	if (uap->sigev != NULL) {
2278		if (mq->mq_notifier != NULL) {
2279			error = EBUSY;
2280		} else {
2281			PROC_LOCK(p);
2282			nt = notifier_search(p, uap->mqd);
2283			if (nt == NULL) {
2284				if (newnt == NULL) {
2285					PROC_UNLOCK(p);
2286					mtx_unlock(&mq->mq_mutex);
2287					newnt = notifier_alloc();
2288					goto again;
2289				}
2290			}
2291
2292			if (nt != NULL) {
2293				sigqueue_take(&nt->nt_ksi);
2294				if (newnt != NULL) {
2295					notifier_free(newnt);
2296					newnt = NULL;
2297				}
2298			} else {
2299				nt = newnt;
2300				newnt = NULL;
2301				ksiginfo_init(&nt->nt_ksi);
2302				nt->nt_ksi.ksi_flags |= KSI_INS | KSI_EXT;
2303				nt->nt_ksi.ksi_code = SI_MESGQ;
2304				nt->nt_proc = p;
2305				nt->nt_ksi.ksi_mqd = uap->mqd;
2306				notifier_insert(p, nt);
2307			}
2308			nt->nt_sigev = ev;
2309			mq->mq_notifier = nt;
2310			PROC_UNLOCK(p);
2311			/*
2312			 * if there is no receivers and message queue
2313			 * is not empty, we should send notification
2314			 * as soon as possible.
2315			 */
2316			if (mq->mq_receivers == 0 &&
2317			    !TAILQ_EMPTY(&mq->mq_msgq))
2318				mqueue_send_notification(mq);
2319		}
2320	} else {
2321		notifier_remove(p, mq, uap->mqd);
2322	}
2323	mtx_unlock(&mq->mq_mutex);
2324
2325out:
2326	fdrop(fp, td);
2327	if (newnt != NULL)
2328		notifier_free(newnt);
2329	return (error);
2330}
2331
2332static void
2333mqueue_fdclose(struct thread *td, int fd, struct file *fp)
2334{
2335	struct filedesc *fdp;
2336	struct mqueue *mq;
2337
2338	fdp = td->td_proc->p_fd;
2339	FILEDESC_LOCK_ASSERT(fdp);
2340
2341	if (fp->f_ops == &mqueueops) {
2342		mq = FPTOMQ(fp);
2343		mtx_lock(&mq->mq_mutex);
2344		notifier_remove(td->td_proc, mq, fd);
2345
2346		/* have to wakeup thread in same process */
2347		if (mq->mq_flags & MQ_RSEL) {
2348			mq->mq_flags &= ~MQ_RSEL;
2349			selwakeup(&mq->mq_rsel);
2350		}
2351		if (mq->mq_flags & MQ_WSEL) {
2352			mq->mq_flags &= ~MQ_WSEL;
2353			selwakeup(&mq->mq_wsel);
2354		}
2355		mtx_unlock(&mq->mq_mutex);
2356	}
2357}
2358
2359static void
2360mq_proc_exit(void *arg __unused, struct proc *p)
2361{
2362	struct filedesc *fdp;
2363	struct file *fp;
2364	struct mqueue *mq;
2365	int i;
2366
2367	fdp = p->p_fd;
2368	FILEDESC_SLOCK(fdp);
2369	for (i = 0; i < fdp->fd_nfiles; ++i) {
2370		fp = fget_locked(fdp, i);
2371		if (fp != NULL && fp->f_ops == &mqueueops) {
2372			mq = FPTOMQ(fp);
2373			mtx_lock(&mq->mq_mutex);
2374			notifier_remove(p, FPTOMQ(fp), i);
2375			mtx_unlock(&mq->mq_mutex);
2376		}
2377	}
2378	FILEDESC_SUNLOCK(fdp);
2379	KASSERT(LIST_EMPTY(&p->p_mqnotifier), ("mq notifiers left"));
2380}
2381
2382static int
2383mqf_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
2384	int flags, struct thread *td)
2385{
2386	return (EOPNOTSUPP);
2387}
2388
2389static int
2390mqf_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
2391	int flags, struct thread *td)
2392{
2393	return (EOPNOTSUPP);
2394}
2395
2396static int
2397mqf_truncate(struct file *fp, off_t length, struct ucred *active_cred,
2398    struct thread *td)
2399{
2400
2401	return (EINVAL);
2402}
2403
2404static int
2405mqf_ioctl(struct file *fp, u_long cmd, void *data,
2406	struct ucred *active_cred, struct thread *td)
2407{
2408	return (ENOTTY);
2409}
2410
2411static int
2412mqf_poll(struct file *fp, int events, struct ucred *active_cred,
2413	struct thread *td)
2414{
2415	struct mqueue *mq = FPTOMQ(fp);
2416	int revents = 0;
2417
2418	mtx_lock(&mq->mq_mutex);
2419	if (events & (POLLIN | POLLRDNORM)) {
2420		if (mq->mq_curmsgs) {
2421			revents |= events & (POLLIN | POLLRDNORM);
2422		} else {
2423			mq->mq_flags |= MQ_RSEL;
2424			selrecord(td, &mq->mq_rsel);
2425 		}
2426	}
2427	if (events & POLLOUT) {
2428		if (mq->mq_curmsgs < mq->mq_maxmsg)
2429			revents |= POLLOUT;
2430		else {
2431			mq->mq_flags |= MQ_WSEL;
2432			selrecord(td, &mq->mq_wsel);
2433		}
2434	}
2435	mtx_unlock(&mq->mq_mutex);
2436	return (revents);
2437}
2438
2439static int
2440mqf_close(struct file *fp, struct thread *td)
2441{
2442	struct mqfs_node *pn;
2443
2444	fp->f_ops = &badfileops;
2445	pn = fp->f_data;
2446	fp->f_data = NULL;
2447	sx_xlock(&mqfs_data.mi_lock);
2448	mqnode_release(pn);
2449	sx_xunlock(&mqfs_data.mi_lock);
2450	return (0);
2451}
2452
2453static int
2454mqf_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
2455	struct thread *td)
2456{
2457	struct mqfs_node *pn = fp->f_data;
2458
2459	bzero(st, sizeof *st);
2460	st->st_atim = pn->mn_atime;
2461	st->st_mtim = pn->mn_mtime;
2462	st->st_ctim = pn->mn_ctime;
2463	st->st_birthtim = pn->mn_birth;
2464	st->st_uid = pn->mn_uid;
2465	st->st_gid = pn->mn_gid;
2466	st->st_mode = S_IFIFO | pn->mn_mode;
2467	return (0);
2468}
2469
2470static int
2471mqf_kqfilter(struct file *fp, struct knote *kn)
2472{
2473	struct mqueue *mq = FPTOMQ(fp);
2474	int error = 0;
2475
2476	if (kn->kn_filter == EVFILT_READ) {
2477		kn->kn_fop = &mq_rfiltops;
2478		knlist_add(&mq->mq_rsel.si_note, kn, 0);
2479	} else if (kn->kn_filter == EVFILT_WRITE) {
2480		kn->kn_fop = &mq_wfiltops;
2481		knlist_add(&mq->mq_wsel.si_note, kn, 0);
2482	} else
2483		error = EINVAL;
2484	return (error);
2485}
2486
2487static void
2488filt_mqdetach(struct knote *kn)
2489{
2490	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2491
2492	if (kn->kn_filter == EVFILT_READ)
2493		knlist_remove(&mq->mq_rsel.si_note, kn, 0);
2494	else if (kn->kn_filter == EVFILT_WRITE)
2495		knlist_remove(&mq->mq_wsel.si_note, kn, 0);
2496	else
2497		panic("filt_mqdetach");
2498}
2499
2500static int
2501filt_mqread(struct knote *kn, long hint)
2502{
2503	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2504
2505	mtx_assert(&mq->mq_mutex, MA_OWNED);
2506	return (mq->mq_curmsgs != 0);
2507}
2508
2509static int
2510filt_mqwrite(struct knote *kn, long hint)
2511{
2512	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2513
2514	mtx_assert(&mq->mq_mutex, MA_OWNED);
2515	return (mq->mq_curmsgs < mq->mq_maxmsg);
2516}
2517
2518static struct fileops mqueueops = {
2519	.fo_read		= mqf_read,
2520	.fo_write		= mqf_write,
2521	.fo_truncate		= mqf_truncate,
2522	.fo_ioctl		= mqf_ioctl,
2523	.fo_poll		= mqf_poll,
2524	.fo_kqfilter		= mqf_kqfilter,
2525	.fo_stat		= mqf_stat,
2526	.fo_close		= mqf_close
2527};
2528
2529static struct vop_vector mqfs_vnodeops = {
2530	.vop_default 		= &default_vnodeops,
2531	.vop_access		= mqfs_access,
2532	.vop_cachedlookup	= mqfs_lookup,
2533	.vop_lookup		= vfs_cache_lookup,
2534	.vop_reclaim		= mqfs_reclaim,
2535	.vop_create		= mqfs_create,
2536	.vop_remove		= mqfs_remove,
2537	.vop_inactive		= mqfs_inactive,
2538	.vop_open		= mqfs_open,
2539	.vop_close		= mqfs_close,
2540	.vop_getattr		= mqfs_getattr,
2541	.vop_setattr		= mqfs_setattr,
2542	.vop_read		= mqfs_read,
2543	.vop_write		= VOP_EOPNOTSUPP,
2544	.vop_readdir		= mqfs_readdir,
2545	.vop_mkdir		= VOP_EOPNOTSUPP,
2546	.vop_rmdir		= VOP_EOPNOTSUPP
2547};
2548
2549static struct vfsops mqfs_vfsops = {
2550	.vfs_init 		= mqfs_init,
2551	.vfs_uninit		= mqfs_uninit,
2552	.vfs_mount		= mqfs_mount,
2553	.vfs_unmount		= mqfs_unmount,
2554	.vfs_root		= mqfs_root,
2555	.vfs_statfs		= mqfs_statfs,
2556};
2557
2558static struct vfsconf mqueuefs_vfsconf = {
2559	.vfc_version = VFS_VERSION,
2560	.vfc_name = "mqueuefs",
2561	.vfc_vfsops = &mqfs_vfsops,
2562	.vfc_typenum = -1,
2563	.vfc_flags = VFCF_SYNTHETIC
2564};
2565
2566static struct syscall_helper_data mq_syscalls[] = {
2567	SYSCALL_INIT_HELPER(kmq_open),
2568	SYSCALL_INIT_HELPER(kmq_setattr),
2569	SYSCALL_INIT_HELPER(kmq_timedsend),
2570	SYSCALL_INIT_HELPER(kmq_timedreceive),
2571	SYSCALL_INIT_HELPER(kmq_notify),
2572	SYSCALL_INIT_HELPER(kmq_unlink),
2573	SYSCALL_INIT_LAST
2574};
2575
2576#ifdef COMPAT_FREEBSD32
2577#include <compat/freebsd32/freebsd32.h>
2578#include <compat/freebsd32/freebsd32_proto.h>
2579#include <compat/freebsd32/freebsd32_syscall.h>
2580#include <compat/freebsd32/freebsd32_util.h>
2581
2582static void
2583mq_attr_from32(const struct mq_attr32 *from, struct mq_attr *to)
2584{
2585
2586	to->mq_flags = from->mq_flags;
2587	to->mq_maxmsg = from->mq_maxmsg;
2588	to->mq_msgsize = from->mq_msgsize;
2589	to->mq_curmsgs = from->mq_curmsgs;
2590}
2591
2592static void
2593mq_attr_to32(const struct mq_attr *from, struct mq_attr32 *to)
2594{
2595
2596	to->mq_flags = from->mq_flags;
2597	to->mq_maxmsg = from->mq_maxmsg;
2598	to->mq_msgsize = from->mq_msgsize;
2599	to->mq_curmsgs = from->mq_curmsgs;
2600}
2601
2602int
2603freebsd32_kmq_open(struct thread *td, struct freebsd32_kmq_open_args *uap)
2604{
2605	struct mq_attr attr;
2606	struct mq_attr32 attr32;
2607	int flags, error;
2608
2609	if ((uap->flags & O_ACCMODE) == O_ACCMODE)
2610		return (EINVAL);
2611	flags = FFLAGS(uap->flags);
2612	if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
2613		error = copyin(uap->attr, &attr32, sizeof(attr32));
2614		if (error)
2615			return (error);
2616		mq_attr_from32(&attr32, &attr);
2617	}
2618	return (kern_kmq_open(td, uap->path, flags, uap->mode,
2619	    uap->attr != NULL ? &attr : NULL));
2620}
2621
2622int
2623freebsd32_kmq_setattr(struct thread *td, struct freebsd32_kmq_setattr_args *uap)
2624{
2625	struct mq_attr attr, oattr;
2626	struct mq_attr32 attr32, oattr32;
2627	int error;
2628
2629	if (uap->attr != NULL) {
2630		error = copyin(uap->attr, &attr32, sizeof(attr32));
2631		if (error != 0)
2632			return (error);
2633		mq_attr_from32(&attr32, &attr);
2634	}
2635	error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
2636	    &oattr);
2637	if (error != 0)
2638		return (error);
2639	if (uap->oattr != NULL) {
2640		mq_attr_to32(&oattr, &oattr32);
2641		error = copyout(&oattr32, uap->oattr, sizeof(oattr32));
2642	}
2643	return (error);
2644}
2645
2646int
2647freebsd32_kmq_timedsend(struct thread *td,
2648    struct freebsd32_kmq_timedsend_args *uap)
2649{
2650	struct mqueue *mq;
2651	struct file *fp;
2652	struct timespec32 ets32;
2653	struct timespec *abs_timeout, ets;
2654	int error;
2655	int waitok;
2656
2657	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
2658	if (error)
2659		return (error);
2660	if (uap->abs_timeout != NULL) {
2661		error = copyin(uap->abs_timeout, &ets32, sizeof(ets32));
2662		if (error != 0)
2663			return (error);
2664		CP(ets32, ets, tv_sec);
2665		CP(ets32, ets, tv_nsec);
2666		abs_timeout = &ets;
2667	} else
2668		abs_timeout = NULL;
2669	waitok = !(fp->f_flag & O_NONBLOCK);
2670	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
2671		uap->msg_prio, waitok, abs_timeout);
2672	fdrop(fp, td);
2673	return (error);
2674}
2675
2676int
2677freebsd32_kmq_timedreceive(struct thread *td,
2678    struct freebsd32_kmq_timedreceive_args *uap)
2679{
2680	struct mqueue *mq;
2681	struct file *fp;
2682	struct timespec32 ets32;
2683	struct timespec *abs_timeout, ets;
2684	int error, waitok;
2685
2686	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
2687	if (error)
2688		return (error);
2689	if (uap->abs_timeout != NULL) {
2690		error = copyin(uap->abs_timeout, &ets32, sizeof(ets32));
2691		if (error != 0)
2692			return (error);
2693		CP(ets32, ets, tv_sec);
2694		CP(ets32, ets, tv_nsec);
2695		abs_timeout = &ets;
2696	} else
2697		abs_timeout = NULL;
2698	waitok = !(fp->f_flag & O_NONBLOCK);
2699	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
2700		uap->msg_prio, waitok, abs_timeout);
2701	fdrop(fp, td);
2702	return (error);
2703}
2704
2705static struct syscall_helper_data mq32_syscalls[] = {
2706	SYSCALL32_INIT_HELPER(freebsd32_kmq_open),
2707	SYSCALL32_INIT_HELPER(freebsd32_kmq_setattr),
2708	SYSCALL32_INIT_HELPER(freebsd32_kmq_timedsend),
2709	SYSCALL32_INIT_HELPER(freebsd32_kmq_timedreceive),
2710	SYSCALL32_INIT_HELPER(kmq_notify),
2711	SYSCALL32_INIT_HELPER(kmq_unlink),
2712	SYSCALL_INIT_LAST
2713};
2714#endif
2715
2716static int
2717mqinit(void)
2718{
2719	int error;
2720
2721	error = syscall_helper_register(mq_syscalls);
2722	if (error != 0)
2723		return (error);
2724#ifdef COMPAT_FREEBSD32
2725	error = syscall32_helper_register(mq32_syscalls);
2726	if (error != 0)
2727		return (error);
2728#endif
2729	return (0);
2730}
2731
2732static int
2733mqunload(void)
2734{
2735
2736#ifdef COMPAT_FREEBSD32
2737	syscall32_helper_unregister(mq32_syscalls);
2738#endif
2739	syscall_helper_unregister(mq_syscalls);
2740	return (0);
2741}
2742
2743static int
2744mq_modload(struct module *module, int cmd, void *arg)
2745{
2746	int error = 0;
2747
2748	error = vfs_modevent(module, cmd, arg);
2749	if (error != 0)
2750		return (error);
2751
2752	switch (cmd) {
2753	case MOD_LOAD:
2754		error = mqinit();
2755		if (error != 0)
2756			mqunload();
2757		break;
2758	case MOD_UNLOAD:
2759		error = mqunload();
2760		break;
2761	default:
2762		break;
2763	}
2764	return (error);
2765}
2766
2767static moduledata_t mqueuefs_mod = {
2768	"mqueuefs",
2769	mq_modload,
2770	&mqueuefs_vfsconf
2771};
2772DECLARE_MODULE(mqueuefs, mqueuefs_mod, SI_SUB_VFS, SI_ORDER_MIDDLE);
2773MODULE_VERSION(mqueuefs, 1);
2774