1/*-
2 * Copyright (c) 2005 David Xu <davidxu@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28/*
29 * POSIX message queue implementation.
30 *
31 * 1) A mqueue filesystem can be mounted, each message queue appears
32 *    in mounted directory, user can change queue's permission and
33 *    ownership, or remove a queue. Manually creating a file in the
34 *    directory causes a message queue to be created in the kernel with
35 *    default message queue attributes applied and same name used, this
36 *    method is not advocated since mq_open syscall allows user to specify
37 *    different attributes. Also the file system can be mounted multiple
38 *    times at different mount points but shows same contents.
39 *
40 * 2) Standard POSIX message queue API. The syscalls do not use vfs layer,
41 *    but directly operate on internal data structure, this allows user to
42 *    use the IPC facility without having to mount mqueue file system.
43 */
44
45#include <sys/cdefs.h>
46__FBSDID("$FreeBSD: releng/10.3/sys/kern/uipc_mqueue.c 325873 2017-11-15 22:45:13Z gordon $");
47
48#include "opt_capsicum.h"
49#include "opt_compat.h"
50
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/systm.h>
54#include <sys/limits.h>
55#include <sys/malloc.h>
56#include <sys/buf.h>
57#include <sys/capsicum.h>
58#include <sys/dirent.h>
59#include <sys/event.h>
60#include <sys/eventhandler.h>
61#include <sys/fcntl.h>
62#include <sys/file.h>
63#include <sys/filedesc.h>
64#include <sys/jail.h>
65#include <sys/lock.h>
66#include <sys/module.h>
67#include <sys/mount.h>
68#include <sys/mqueue.h>
69#include <sys/mutex.h>
70#include <sys/namei.h>
71#include <sys/posix4.h>
72#include <sys/poll.h>
73#include <sys/priv.h>
74#include <sys/proc.h>
75#include <sys/queue.h>
76#include <sys/sysproto.h>
77#include <sys/stat.h>
78#include <sys/syscall.h>
79#include <sys/syscallsubr.h>
80#include <sys/sysent.h>
81#include <sys/sx.h>
82#include <sys/sysctl.h>
83#include <sys/taskqueue.h>
84#include <sys/unistd.h>
85#include <sys/vnode.h>
86#include <machine/atomic.h>
87
88FEATURE(p1003_1b_mqueue, "POSIX P1003.1B message queues support");
89
90/*
91 * Limits and constants
92 */
93#define	MQFS_NAMELEN		NAME_MAX
94#define MQFS_DELEN		(8 + MQFS_NAMELEN)
95
96/* node types */
97typedef enum {
98	mqfstype_none = 0,
99	mqfstype_root,
100	mqfstype_dir,
101	mqfstype_this,
102	mqfstype_parent,
103	mqfstype_file,
104	mqfstype_symlink,
105} mqfs_type_t;
106
107struct mqfs_node;
108
109/*
110 * mqfs_info: describes a mqfs instance
111 */
112struct mqfs_info {
113	struct sx		mi_lock;
114	struct mqfs_node	*mi_root;
115	struct unrhdr		*mi_unrhdr;
116};
117
118struct mqfs_vdata {
119	LIST_ENTRY(mqfs_vdata)	mv_link;
120	struct mqfs_node	*mv_node;
121	struct vnode		*mv_vnode;
122	struct task		mv_task;
123};
124
125/*
126 * mqfs_node: describes a node (file or directory) within a mqfs
127 */
128struct mqfs_node {
129	char			mn_name[MQFS_NAMELEN+1];
130	struct mqfs_info	*mn_info;
131	struct mqfs_node	*mn_parent;
132	LIST_HEAD(,mqfs_node)	mn_children;
133	LIST_ENTRY(mqfs_node)	mn_sibling;
134	LIST_HEAD(,mqfs_vdata)	mn_vnodes;
135	const void		*mn_pr_root;
136	int			mn_refcount;
137	mqfs_type_t		mn_type;
138	int			mn_deleted;
139	uint32_t		mn_fileno;
140	void			*mn_data;
141	struct timespec		mn_birth;
142	struct timespec		mn_ctime;
143	struct timespec		mn_atime;
144	struct timespec		mn_mtime;
145	uid_t			mn_uid;
146	gid_t			mn_gid;
147	int			mn_mode;
148};
149
150#define	VTON(vp)	(((struct mqfs_vdata *)((vp)->v_data))->mv_node)
151#define VTOMQ(vp) 	((struct mqueue *)(VTON(vp)->mn_data))
152#define	VFSTOMQFS(m)	((struct mqfs_info *)((m)->mnt_data))
153#define	FPTOMQ(fp)	((struct mqueue *)(((struct mqfs_node *) \
154				(fp)->f_data)->mn_data))
155
156struct mqfs_osd {
157	struct task	mo_task;
158	const void	*mo_pr_root;
159};
160
161TAILQ_HEAD(msgq, mqueue_msg);
162
163struct mqueue;
164
165struct mqueue_notifier {
166	LIST_ENTRY(mqueue_notifier)	nt_link;
167	struct sigevent			nt_sigev;
168	ksiginfo_t			nt_ksi;
169	struct proc			*nt_proc;
170};
171
172struct mqueue {
173	struct mtx	mq_mutex;
174	int		mq_flags;
175	long		mq_maxmsg;
176	long		mq_msgsize;
177	long		mq_curmsgs;
178	long		mq_totalbytes;
179	struct msgq	mq_msgq;
180	int		mq_receivers;
181	int		mq_senders;
182	struct selinfo	mq_rsel;
183	struct selinfo	mq_wsel;
184	struct mqueue_notifier	*mq_notifier;
185};
186
187#define	MQ_RSEL		0x01
188#define	MQ_WSEL		0x02
189
190struct mqueue_msg {
191	TAILQ_ENTRY(mqueue_msg)	msg_link;
192	unsigned int	msg_prio;
193	unsigned int	msg_size;
194	/* following real data... */
195};
196
197static SYSCTL_NODE(_kern, OID_AUTO, mqueue, CTLFLAG_RW, 0,
198	"POSIX real time message queue");
199
200static int	default_maxmsg  = 10;
201static int	default_msgsize = 1024;
202
203static int	maxmsg = 100;
204SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsg, CTLFLAG_RW,
205    &maxmsg, 0, "Default maximum messages in queue");
206static int	maxmsgsize = 16384;
207SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmsgsize, CTLFLAG_RW,
208    &maxmsgsize, 0, "Default maximum message size");
209static int	maxmq = 100;
210SYSCTL_INT(_kern_mqueue, OID_AUTO, maxmq, CTLFLAG_RW,
211    &maxmq, 0, "maximum message queues");
212static int	curmq = 0;
213SYSCTL_INT(_kern_mqueue, OID_AUTO, curmq, CTLFLAG_RW,
214    &curmq, 0, "current message queue number");
215static int	unloadable = 0;
216static MALLOC_DEFINE(M_MQUEUEDATA, "mqdata", "mqueue data");
217
218static eventhandler_tag exit_tag;
219
220/* Only one instance per-system */
221static struct mqfs_info		mqfs_data;
222static uma_zone_t		mqnode_zone;
223static uma_zone_t		mqueue_zone;
224static uma_zone_t		mvdata_zone;
225static uma_zone_t		mqnoti_zone;
226static struct vop_vector	mqfs_vnodeops;
227static struct fileops		mqueueops;
228static unsigned			mqfs_osd_jail_slot;
229
230/*
231 * Directory structure construction and manipulation
232 */
233#ifdef notyet
234static struct mqfs_node	*mqfs_create_dir(struct mqfs_node *parent,
235	const char *name, int namelen, struct ucred *cred, int mode);
236static struct mqfs_node	*mqfs_create_link(struct mqfs_node *parent,
237	const char *name, int namelen, struct ucred *cred, int mode);
238#endif
239
240static struct mqfs_node	*mqfs_create_file(struct mqfs_node *parent,
241	const char *name, int namelen, struct ucred *cred, int mode);
242static int	mqfs_destroy(struct mqfs_node *mn);
243static void	mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn);
244static void	mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn);
245static int	mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn);
246static int	mqfs_prison_create(void *obj, void *data);
247static void	mqfs_prison_destructor(void *data);
248static void	mqfs_prison_remove_task(void *context, int pending);
249
250/*
251 * Message queue construction and maniplation
252 */
253static struct mqueue	*mqueue_alloc(const struct mq_attr *attr);
254static void	mqueue_free(struct mqueue *mq);
255static int	mqueue_send(struct mqueue *mq, const char *msg_ptr,
256			size_t msg_len, unsigned msg_prio, int waitok,
257			const struct timespec *abs_timeout);
258static int	mqueue_receive(struct mqueue *mq, char *msg_ptr,
259			size_t msg_len, unsigned *msg_prio, int waitok,
260			const struct timespec *abs_timeout);
261static int	_mqueue_send(struct mqueue *mq, struct mqueue_msg *msg,
262			int timo);
263static int	_mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg,
264			int timo);
265static void	mqueue_send_notification(struct mqueue *mq);
266static void	mqueue_fdclose(struct thread *td, int fd, struct file *fp);
267static void	mq_proc_exit(void *arg, struct proc *p);
268
269/*
270 * kqueue filters
271 */
272static void	filt_mqdetach(struct knote *kn);
273static int	filt_mqread(struct knote *kn, long hint);
274static int	filt_mqwrite(struct knote *kn, long hint);
275
276struct filterops mq_rfiltops = {
277	.f_isfd = 1,
278	.f_detach = filt_mqdetach,
279	.f_event = filt_mqread,
280};
281struct filterops mq_wfiltops = {
282	.f_isfd = 1,
283	.f_detach = filt_mqdetach,
284	.f_event = filt_mqwrite,
285};
286
287/*
288 * Initialize fileno bitmap
289 */
290static void
291mqfs_fileno_init(struct mqfs_info *mi)
292{
293	struct unrhdr *up;
294
295	up = new_unrhdr(1, INT_MAX, NULL);
296	mi->mi_unrhdr = up;
297}
298
299/*
300 * Tear down fileno bitmap
301 */
302static void
303mqfs_fileno_uninit(struct mqfs_info *mi)
304{
305	struct unrhdr *up;
306
307	up = mi->mi_unrhdr;
308	mi->mi_unrhdr = NULL;
309	delete_unrhdr(up);
310}
311
312/*
313 * Allocate a file number
314 */
315static void
316mqfs_fileno_alloc(struct mqfs_info *mi, struct mqfs_node *mn)
317{
318	/* make sure our parent has a file number */
319	if (mn->mn_parent && !mn->mn_parent->mn_fileno)
320		mqfs_fileno_alloc(mi, mn->mn_parent);
321
322	switch (mn->mn_type) {
323	case mqfstype_root:
324	case mqfstype_dir:
325	case mqfstype_file:
326	case mqfstype_symlink:
327		mn->mn_fileno = alloc_unr(mi->mi_unrhdr);
328		break;
329	case mqfstype_this:
330		KASSERT(mn->mn_parent != NULL,
331		    ("mqfstype_this node has no parent"));
332		mn->mn_fileno = mn->mn_parent->mn_fileno;
333		break;
334	case mqfstype_parent:
335		KASSERT(mn->mn_parent != NULL,
336		    ("mqfstype_parent node has no parent"));
337		if (mn->mn_parent == mi->mi_root) {
338			mn->mn_fileno = mn->mn_parent->mn_fileno;
339			break;
340		}
341		KASSERT(mn->mn_parent->mn_parent != NULL,
342		    ("mqfstype_parent node has no grandparent"));
343		mn->mn_fileno = mn->mn_parent->mn_parent->mn_fileno;
344		break;
345	default:
346		KASSERT(0,
347		    ("mqfs_fileno_alloc() called for unknown type node: %d",
348			mn->mn_type));
349		break;
350	}
351}
352
353/*
354 * Release a file number
355 */
356static void
357mqfs_fileno_free(struct mqfs_info *mi, struct mqfs_node *mn)
358{
359	switch (mn->mn_type) {
360	case mqfstype_root:
361	case mqfstype_dir:
362	case mqfstype_file:
363	case mqfstype_symlink:
364		free_unr(mi->mi_unrhdr, mn->mn_fileno);
365		break;
366	case mqfstype_this:
367	case mqfstype_parent:
368		/* ignore these, as they don't "own" their file number */
369		break;
370	default:
371		KASSERT(0,
372		    ("mqfs_fileno_free() called for unknown type node: %d",
373			mn->mn_type));
374		break;
375	}
376}
377
378static __inline struct mqfs_node *
379mqnode_alloc(void)
380{
381	return uma_zalloc(mqnode_zone, M_WAITOK | M_ZERO);
382}
383
384static __inline void
385mqnode_free(struct mqfs_node *node)
386{
387	uma_zfree(mqnode_zone, node);
388}
389
390static __inline void
391mqnode_addref(struct mqfs_node *node)
392{
393	atomic_fetchadd_int(&node->mn_refcount, 1);
394}
395
396static __inline void
397mqnode_release(struct mqfs_node *node)
398{
399	struct mqfs_info *mqfs;
400	int old, exp;
401
402	mqfs = node->mn_info;
403	old = atomic_fetchadd_int(&node->mn_refcount, -1);
404	if (node->mn_type == mqfstype_dir ||
405	    node->mn_type == mqfstype_root)
406		exp = 3; /* include . and .. */
407	else
408		exp = 1;
409	if (old == exp) {
410		int locked = sx_xlocked(&mqfs->mi_lock);
411		if (!locked)
412			sx_xlock(&mqfs->mi_lock);
413		mqfs_destroy(node);
414		if (!locked)
415			sx_xunlock(&mqfs->mi_lock);
416	}
417}
418
419/*
420 * Add a node to a directory
421 */
422static int
423mqfs_add_node(struct mqfs_node *parent, struct mqfs_node *node)
424{
425	KASSERT(parent != NULL, ("%s(): parent is NULL", __func__));
426	KASSERT(parent->mn_info != NULL,
427	    ("%s(): parent has no mn_info", __func__));
428	KASSERT(parent->mn_type == mqfstype_dir ||
429	    parent->mn_type == mqfstype_root,
430	    ("%s(): parent is not a directory", __func__));
431
432	node->mn_info = parent->mn_info;
433	node->mn_parent = parent;
434	LIST_INIT(&node->mn_children);
435	LIST_INIT(&node->mn_vnodes);
436	LIST_INSERT_HEAD(&parent->mn_children, node, mn_sibling);
437	mqnode_addref(parent);
438	return (0);
439}
440
441static struct mqfs_node *
442mqfs_create_node(const char *name, int namelen, struct ucred *cred, int mode,
443	int nodetype)
444{
445	struct mqfs_node *node;
446
447	node = mqnode_alloc();
448	strncpy(node->mn_name, name, namelen);
449	node->mn_pr_root = cred->cr_prison->pr_root;
450	node->mn_type = nodetype;
451	node->mn_refcount = 1;
452	vfs_timestamp(&node->mn_birth);
453	node->mn_ctime = node->mn_atime = node->mn_mtime
454		= node->mn_birth;
455	node->mn_uid = cred->cr_uid;
456	node->mn_gid = cred->cr_gid;
457	node->mn_mode = mode;
458	return (node);
459}
460
461/*
462 * Create a file
463 */
464static struct mqfs_node *
465mqfs_create_file(struct mqfs_node *parent, const char *name, int namelen,
466	struct ucred *cred, int mode)
467{
468	struct mqfs_node *node;
469
470	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_file);
471	if (mqfs_add_node(parent, node) != 0) {
472		mqnode_free(node);
473		return (NULL);
474	}
475	return (node);
476}
477
478/*
479 * Add . and .. to a directory
480 */
481static int
482mqfs_fixup_dir(struct mqfs_node *parent)
483{
484	struct mqfs_node *dir;
485
486	dir = mqnode_alloc();
487	dir->mn_name[0] = '.';
488	dir->mn_type = mqfstype_this;
489	dir->mn_refcount = 1;
490	if (mqfs_add_node(parent, dir) != 0) {
491		mqnode_free(dir);
492		return (-1);
493	}
494
495	dir = mqnode_alloc();
496	dir->mn_name[0] = dir->mn_name[1] = '.';
497	dir->mn_type = mqfstype_parent;
498	dir->mn_refcount = 1;
499
500	if (mqfs_add_node(parent, dir) != 0) {
501		mqnode_free(dir);
502		return (-1);
503	}
504
505	return (0);
506}
507
508#ifdef notyet
509
510/*
511 * Create a directory
512 */
513static struct mqfs_node *
514mqfs_create_dir(struct mqfs_node *parent, const char *name, int namelen,
515	struct ucred *cred, int mode)
516{
517	struct mqfs_node *node;
518
519	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_dir);
520	if (mqfs_add_node(parent, node) != 0) {
521		mqnode_free(node);
522		return (NULL);
523	}
524
525	if (mqfs_fixup_dir(node) != 0) {
526		mqfs_destroy(node);
527		return (NULL);
528	}
529	return (node);
530}
531
532/*
533 * Create a symlink
534 */
535static struct mqfs_node *
536mqfs_create_link(struct mqfs_node *parent, const char *name, int namelen,
537	struct ucred *cred, int mode)
538{
539	struct mqfs_node *node;
540
541	node = mqfs_create_node(name, namelen, cred, mode, mqfstype_symlink);
542	if (mqfs_add_node(parent, node) != 0) {
543		mqnode_free(node);
544		return (NULL);
545	}
546	return (node);
547}
548
549#endif
550
551/*
552 * Destroy a node or a tree of nodes
553 */
554static int
555mqfs_destroy(struct mqfs_node *node)
556{
557	struct mqfs_node *parent;
558
559	KASSERT(node != NULL,
560	    ("%s(): node is NULL", __func__));
561	KASSERT(node->mn_info != NULL,
562	    ("%s(): node has no mn_info", __func__));
563
564	/* destroy children */
565	if (node->mn_type == mqfstype_dir || node->mn_type == mqfstype_root)
566		while (! LIST_EMPTY(&node->mn_children))
567			mqfs_destroy(LIST_FIRST(&node->mn_children));
568
569	/* unlink from parent */
570	if ((parent = node->mn_parent) != NULL) {
571		KASSERT(parent->mn_info == node->mn_info,
572		    ("%s(): parent has different mn_info", __func__));
573		LIST_REMOVE(node, mn_sibling);
574	}
575
576	if (node->mn_fileno != 0)
577		mqfs_fileno_free(node->mn_info, node);
578	if (node->mn_data != NULL)
579		mqueue_free(node->mn_data);
580	mqnode_free(node);
581	return (0);
582}
583
584/*
585 * Mount a mqfs instance
586 */
587static int
588mqfs_mount(struct mount *mp)
589{
590	struct statfs *sbp;
591
592	if (mp->mnt_flag & MNT_UPDATE)
593		return (EOPNOTSUPP);
594
595	mp->mnt_data = &mqfs_data;
596	MNT_ILOCK(mp);
597	mp->mnt_flag |= MNT_LOCAL;
598	MNT_IUNLOCK(mp);
599	vfs_getnewfsid(mp);
600
601	sbp = &mp->mnt_stat;
602	vfs_mountedfrom(mp, "mqueue");
603	sbp->f_bsize = PAGE_SIZE;
604	sbp->f_iosize = PAGE_SIZE;
605	sbp->f_blocks = 1;
606	sbp->f_bfree = 0;
607	sbp->f_bavail = 0;
608	sbp->f_files = 1;
609	sbp->f_ffree = 0;
610	return (0);
611}
612
613/*
614 * Unmount a mqfs instance
615 */
616static int
617mqfs_unmount(struct mount *mp, int mntflags)
618{
619	int error;
620
621	error = vflush(mp, 0, (mntflags & MNT_FORCE) ?  FORCECLOSE : 0,
622	    curthread);
623	return (error);
624}
625
626/*
627 * Return a root vnode
628 */
629static int
630mqfs_root(struct mount *mp, int flags, struct vnode **vpp)
631{
632	struct mqfs_info *mqfs;
633	int ret;
634
635	mqfs = VFSTOMQFS(mp);
636	ret = mqfs_allocv(mp, vpp, mqfs->mi_root);
637	return (ret);
638}
639
640/*
641 * Return filesystem stats
642 */
643static int
644mqfs_statfs(struct mount *mp, struct statfs *sbp)
645{
646	/* XXX update statistics */
647	return (0);
648}
649
650/*
651 * Initialize a mqfs instance
652 */
653static int
654mqfs_init(struct vfsconf *vfc)
655{
656	struct mqfs_node *root;
657	struct mqfs_info *mi;
658	struct prison *pr;
659	osd_method_t methods[PR_MAXMETHOD] = {
660	    [PR_METHOD_CREATE] = mqfs_prison_create,
661	};
662
663	mqnode_zone = uma_zcreate("mqnode", sizeof(struct mqfs_node),
664		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
665	mqueue_zone = uma_zcreate("mqueue", sizeof(struct mqueue),
666		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
667	mvdata_zone = uma_zcreate("mvdata",
668		sizeof(struct mqfs_vdata), NULL, NULL, NULL,
669		NULL, UMA_ALIGN_PTR, 0);
670	mqnoti_zone = uma_zcreate("mqnotifier", sizeof(struct mqueue_notifier),
671		NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
672	mi = &mqfs_data;
673	sx_init(&mi->mi_lock, "mqfs lock");
674	/* set up the root diretory */
675	root = mqfs_create_node("/", 1, curthread->td_ucred, 01777,
676		mqfstype_root);
677	root->mn_info = mi;
678	LIST_INIT(&root->mn_children);
679	LIST_INIT(&root->mn_vnodes);
680	mi->mi_root = root;
681	mqfs_fileno_init(mi);
682	mqfs_fileno_alloc(mi, root);
683	mqfs_fixup_dir(root);
684	exit_tag = EVENTHANDLER_REGISTER(process_exit, mq_proc_exit, NULL,
685	    EVENTHANDLER_PRI_ANY);
686	mq_fdclose = mqueue_fdclose;
687	p31b_setcfg(CTL_P1003_1B_MESSAGE_PASSING, _POSIX_MESSAGE_PASSING);
688
689	/* Note current jails. */
690	mqfs_osd_jail_slot = osd_jail_register(mqfs_prison_destructor, methods);
691	sx_slock(&allprison_lock);
692	TAILQ_FOREACH(pr, &allprison, pr_list)
693		(void)mqfs_prison_create(pr, NULL);
694	sx_sunlock(&allprison_lock);
695	return (0);
696}
697
698/*
699 * Destroy a mqfs instance
700 */
701static int
702mqfs_uninit(struct vfsconf *vfc)
703{
704	unsigned slot;
705	struct mqfs_info *mi;
706
707	if (!unloadable)
708		return (EOPNOTSUPP);
709	slot = mqfs_osd_jail_slot;
710	mqfs_osd_jail_slot = 0;
711	osd_jail_deregister(slot);
712	EVENTHANDLER_DEREGISTER(process_exit, exit_tag);
713	mi = &mqfs_data;
714	mqfs_destroy(mi->mi_root);
715	mi->mi_root = NULL;
716	mqfs_fileno_uninit(mi);
717	sx_destroy(&mi->mi_lock);
718	uma_zdestroy(mqnode_zone);
719	uma_zdestroy(mqueue_zone);
720	uma_zdestroy(mvdata_zone);
721	uma_zdestroy(mqnoti_zone);
722	return (0);
723}
724
725/*
726 * task routine
727 */
728static void
729do_recycle(void *context, int pending __unused)
730{
731	struct vnode *vp = (struct vnode *)context;
732
733	vrecycle(vp);
734	vdrop(vp);
735}
736
737/*
738 * Allocate a vnode
739 */
740static int
741mqfs_allocv(struct mount *mp, struct vnode **vpp, struct mqfs_node *pn)
742{
743	struct mqfs_vdata *vd;
744	struct mqfs_info  *mqfs;
745	struct vnode *newvpp;
746	int error;
747
748	mqfs = pn->mn_info;
749	*vpp = NULL;
750	sx_xlock(&mqfs->mi_lock);
751	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
752		if (vd->mv_vnode->v_mount == mp) {
753			vhold(vd->mv_vnode);
754			break;
755		}
756	}
757
758	if (vd != NULL) {
759found:
760		*vpp = vd->mv_vnode;
761		sx_xunlock(&mqfs->mi_lock);
762		error = vget(*vpp, LK_RETRY | LK_EXCLUSIVE, curthread);
763		vdrop(*vpp);
764		return (error);
765	}
766	sx_xunlock(&mqfs->mi_lock);
767
768	error = getnewvnode("mqueue", mp, &mqfs_vnodeops, &newvpp);
769	if (error)
770		return (error);
771	vn_lock(newvpp, LK_EXCLUSIVE | LK_RETRY);
772	error = insmntque(newvpp, mp);
773	if (error != 0)
774		return (error);
775
776	sx_xlock(&mqfs->mi_lock);
777	/*
778	 * Check if it has already been allocated
779	 * while we were blocked.
780	 */
781	LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
782		if (vd->mv_vnode->v_mount == mp) {
783			vhold(vd->mv_vnode);
784			sx_xunlock(&mqfs->mi_lock);
785
786			vgone(newvpp);
787			vput(newvpp);
788			goto found;
789		}
790	}
791
792	*vpp = newvpp;
793
794	vd = uma_zalloc(mvdata_zone, M_WAITOK);
795	(*vpp)->v_data = vd;
796	vd->mv_vnode = *vpp;
797	vd->mv_node = pn;
798	TASK_INIT(&vd->mv_task, 0, do_recycle, *vpp);
799	LIST_INSERT_HEAD(&pn->mn_vnodes, vd, mv_link);
800	mqnode_addref(pn);
801	switch (pn->mn_type) {
802	case mqfstype_root:
803		(*vpp)->v_vflag = VV_ROOT;
804		/* fall through */
805	case mqfstype_dir:
806	case mqfstype_this:
807	case mqfstype_parent:
808		(*vpp)->v_type = VDIR;
809		break;
810	case mqfstype_file:
811		(*vpp)->v_type = VREG;
812		break;
813	case mqfstype_symlink:
814		(*vpp)->v_type = VLNK;
815		break;
816	case mqfstype_none:
817		KASSERT(0, ("mqfs_allocf called for null node\n"));
818	default:
819		panic("%s has unexpected type: %d", pn->mn_name, pn->mn_type);
820	}
821	sx_xunlock(&mqfs->mi_lock);
822	return (0);
823}
824
825/*
826 * Search a directory entry
827 */
828static struct mqfs_node *
829mqfs_search(struct mqfs_node *pd, const char *name, int len, struct ucred *cred)
830{
831	struct mqfs_node *pn;
832	const void *pr_root;
833
834	sx_assert(&pd->mn_info->mi_lock, SX_LOCKED);
835	pr_root = cred->cr_prison->pr_root;
836	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
837		/* Only match names within the same prison root directory */
838		if ((pn->mn_pr_root == NULL || pn->mn_pr_root == pr_root) &&
839		    strncmp(pn->mn_name, name, len) == 0 &&
840		    pn->mn_name[len] == '\0')
841			return (pn);
842	}
843	return (NULL);
844}
845
846/*
847 * Look up a file or directory.
848 */
849static int
850mqfs_lookupx(struct vop_cachedlookup_args *ap)
851{
852	struct componentname *cnp;
853	struct vnode *dvp, **vpp;
854	struct mqfs_node *pd;
855	struct mqfs_node *pn;
856	struct mqfs_info *mqfs;
857	int nameiop, flags, error, namelen;
858	char *pname;
859	struct thread *td;
860
861	cnp = ap->a_cnp;
862	vpp = ap->a_vpp;
863	dvp = ap->a_dvp;
864	pname = cnp->cn_nameptr;
865	namelen = cnp->cn_namelen;
866	td = cnp->cn_thread;
867	flags = cnp->cn_flags;
868	nameiop = cnp->cn_nameiop;
869	pd = VTON(dvp);
870	pn = NULL;
871	mqfs = pd->mn_info;
872	*vpp = NULLVP;
873
874	if (dvp->v_type != VDIR)
875		return (ENOTDIR);
876
877	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, cnp->cn_thread);
878	if (error)
879		return (error);
880
881	/* shortcut: check if the name is too long */
882	if (cnp->cn_namelen >= MQFS_NAMELEN)
883		return (ENOENT);
884
885	/* self */
886	if (namelen == 1 && pname[0] == '.') {
887		if ((flags & ISLASTCN) && nameiop != LOOKUP)
888			return (EINVAL);
889		pn = pd;
890		*vpp = dvp;
891		VREF(dvp);
892		return (0);
893	}
894
895	/* parent */
896	if (cnp->cn_flags & ISDOTDOT) {
897		if (dvp->v_vflag & VV_ROOT)
898			return (EIO);
899		if ((flags & ISLASTCN) && nameiop != LOOKUP)
900			return (EINVAL);
901		VOP_UNLOCK(dvp, 0);
902		KASSERT(pd->mn_parent, ("non-root directory has no parent"));
903		pn = pd->mn_parent;
904		error = mqfs_allocv(dvp->v_mount, vpp, pn);
905		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
906		return (error);
907	}
908
909	/* named node */
910	sx_xlock(&mqfs->mi_lock);
911	pn = mqfs_search(pd, pname, namelen, cnp->cn_cred);
912	if (pn != NULL)
913		mqnode_addref(pn);
914	sx_xunlock(&mqfs->mi_lock);
915
916	/* found */
917	if (pn != NULL) {
918		/* DELETE */
919		if (nameiop == DELETE && (flags & ISLASTCN)) {
920			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
921			if (error) {
922				mqnode_release(pn);
923				return (error);
924			}
925			if (*vpp == dvp) {
926				VREF(dvp);
927				*vpp = dvp;
928				mqnode_release(pn);
929				return (0);
930			}
931		}
932
933		/* allocate vnode */
934		error = mqfs_allocv(dvp->v_mount, vpp, pn);
935		mqnode_release(pn);
936		if (error == 0 && cnp->cn_flags & MAKEENTRY)
937			cache_enter(dvp, *vpp, cnp);
938		return (error);
939	}
940
941	/* not found */
942
943	/* will create a new entry in the directory ? */
944	if ((nameiop == CREATE || nameiop == RENAME) && (flags & LOCKPARENT)
945	    && (flags & ISLASTCN)) {
946		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td);
947		if (error)
948			return (error);
949		cnp->cn_flags |= SAVENAME;
950		return (EJUSTRETURN);
951	}
952	return (ENOENT);
953}
954
955#if 0
956struct vop_lookup_args {
957	struct vop_generic_args a_gen;
958	struct vnode *a_dvp;
959	struct vnode **a_vpp;
960	struct componentname *a_cnp;
961};
962#endif
963
964/*
965 * vnode lookup operation
966 */
967static int
968mqfs_lookup(struct vop_cachedlookup_args *ap)
969{
970	int rc;
971
972	rc = mqfs_lookupx(ap);
973	return (rc);
974}
975
976#if 0
977struct vop_create_args {
978	struct vnode *a_dvp;
979	struct vnode **a_vpp;
980	struct componentname *a_cnp;
981	struct vattr *a_vap;
982};
983#endif
984
985/*
986 * vnode creation operation
987 */
988static int
989mqfs_create(struct vop_create_args *ap)
990{
991	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
992	struct componentname *cnp = ap->a_cnp;
993	struct mqfs_node *pd;
994	struct mqfs_node *pn;
995	struct mqueue *mq;
996	int error;
997
998	pd = VTON(ap->a_dvp);
999	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
1000		return (ENOTDIR);
1001	mq = mqueue_alloc(NULL);
1002	if (mq == NULL)
1003		return (EAGAIN);
1004	sx_xlock(&mqfs->mi_lock);
1005	if ((cnp->cn_flags & HASBUF) == 0)
1006		panic("%s: no name", __func__);
1007	pn = mqfs_create_file(pd, cnp->cn_nameptr, cnp->cn_namelen,
1008		cnp->cn_cred, ap->a_vap->va_mode);
1009	if (pn == NULL) {
1010		sx_xunlock(&mqfs->mi_lock);
1011		error = ENOSPC;
1012	} else {
1013		mqnode_addref(pn);
1014		sx_xunlock(&mqfs->mi_lock);
1015		error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
1016		mqnode_release(pn);
1017		if (error)
1018			mqfs_destroy(pn);
1019		else
1020			pn->mn_data = mq;
1021	}
1022	if (error)
1023		mqueue_free(mq);
1024	return (error);
1025}
1026
1027/*
1028 * Remove an entry
1029 */
1030static
1031int do_unlink(struct mqfs_node *pn, struct ucred *ucred)
1032{
1033	struct mqfs_node *parent;
1034	struct mqfs_vdata *vd;
1035	int error = 0;
1036
1037	sx_assert(&pn->mn_info->mi_lock, SX_LOCKED);
1038
1039	if (ucred->cr_uid != pn->mn_uid &&
1040	    (error = priv_check_cred(ucred, PRIV_MQ_ADMIN, 0)) != 0)
1041		error = EACCES;
1042	else if (!pn->mn_deleted) {
1043		parent = pn->mn_parent;
1044		pn->mn_parent = NULL;
1045		pn->mn_deleted = 1;
1046		LIST_REMOVE(pn, mn_sibling);
1047		LIST_FOREACH(vd, &pn->mn_vnodes, mv_link) {
1048			cache_purge(vd->mv_vnode);
1049			vhold(vd->mv_vnode);
1050			taskqueue_enqueue(taskqueue_thread, &vd->mv_task);
1051		}
1052		mqnode_release(pn);
1053		mqnode_release(parent);
1054	} else
1055		error = ENOENT;
1056	return (error);
1057}
1058
1059#if 0
1060struct vop_remove_args {
1061	struct vnode *a_dvp;
1062	struct vnode *a_vp;
1063	struct componentname *a_cnp;
1064};
1065#endif
1066
1067/*
1068 * vnode removal operation
1069 */
1070static int
1071mqfs_remove(struct vop_remove_args *ap)
1072{
1073	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1074	struct mqfs_node *pn;
1075	int error;
1076
1077	if (ap->a_vp->v_type == VDIR)
1078                return (EPERM);
1079	pn = VTON(ap->a_vp);
1080	sx_xlock(&mqfs->mi_lock);
1081	error = do_unlink(pn, ap->a_cnp->cn_cred);
1082	sx_xunlock(&mqfs->mi_lock);
1083	return (error);
1084}
1085
1086#if 0
1087struct vop_inactive_args {
1088	struct vnode *a_vp;
1089	struct thread *a_td;
1090};
1091#endif
1092
1093static int
1094mqfs_inactive(struct vop_inactive_args *ap)
1095{
1096	struct mqfs_node *pn = VTON(ap->a_vp);
1097
1098	if (pn->mn_deleted)
1099		vrecycle(ap->a_vp);
1100	return (0);
1101}
1102
1103#if 0
1104struct vop_reclaim_args {
1105	struct vop_generic_args a_gen;
1106	struct vnode *a_vp;
1107	struct thread *a_td;
1108};
1109#endif
1110
1111static int
1112mqfs_reclaim(struct vop_reclaim_args *ap)
1113{
1114	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_vp->v_mount);
1115	struct vnode *vp = ap->a_vp;
1116	struct mqfs_node *pn;
1117	struct mqfs_vdata *vd;
1118
1119	vd = vp->v_data;
1120	pn = vd->mv_node;
1121	sx_xlock(&mqfs->mi_lock);
1122	vp->v_data = NULL;
1123	LIST_REMOVE(vd, mv_link);
1124	uma_zfree(mvdata_zone, vd);
1125	mqnode_release(pn);
1126	sx_xunlock(&mqfs->mi_lock);
1127	return (0);
1128}
1129
1130#if 0
1131struct vop_open_args {
1132	struct vop_generic_args a_gen;
1133	struct vnode *a_vp;
1134	int a_mode;
1135	struct ucred *a_cred;
1136	struct thread *a_td;
1137	struct file *a_fp;
1138};
1139#endif
1140
1141static int
1142mqfs_open(struct vop_open_args *ap)
1143{
1144	return (0);
1145}
1146
1147#if 0
1148struct vop_close_args {
1149	struct vop_generic_args a_gen;
1150	struct vnode *a_vp;
1151	int a_fflag;
1152	struct ucred *a_cred;
1153	struct thread *a_td;
1154};
1155#endif
1156
1157static int
1158mqfs_close(struct vop_close_args *ap)
1159{
1160	return (0);
1161}
1162
1163#if 0
1164struct vop_access_args {
1165	struct vop_generic_args a_gen;
1166	struct vnode *a_vp;
1167	accmode_t a_accmode;
1168	struct ucred *a_cred;
1169	struct thread *a_td;
1170};
1171#endif
1172
1173/*
1174 * Verify permissions
1175 */
1176static int
1177mqfs_access(struct vop_access_args *ap)
1178{
1179	struct vnode *vp = ap->a_vp;
1180	struct vattr vattr;
1181	int error;
1182
1183	error = VOP_GETATTR(vp, &vattr, ap->a_cred);
1184	if (error)
1185		return (error);
1186	error = vaccess(vp->v_type, vattr.va_mode, vattr.va_uid,
1187	    vattr.va_gid, ap->a_accmode, ap->a_cred, NULL);
1188	return (error);
1189}
1190
1191#if 0
1192struct vop_getattr_args {
1193	struct vop_generic_args a_gen;
1194	struct vnode *a_vp;
1195	struct vattr *a_vap;
1196	struct ucred *a_cred;
1197};
1198#endif
1199
1200/*
1201 * Get file attributes
1202 */
1203static int
1204mqfs_getattr(struct vop_getattr_args *ap)
1205{
1206	struct vnode *vp = ap->a_vp;
1207	struct mqfs_node *pn = VTON(vp);
1208	struct vattr *vap = ap->a_vap;
1209	int error = 0;
1210
1211	vap->va_type = vp->v_type;
1212	vap->va_mode = pn->mn_mode;
1213	vap->va_nlink = 1;
1214	vap->va_uid = pn->mn_uid;
1215	vap->va_gid = pn->mn_gid;
1216	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
1217	vap->va_fileid = pn->mn_fileno;
1218	vap->va_size = 0;
1219	vap->va_blocksize = PAGE_SIZE;
1220	vap->va_bytes = vap->va_size = 0;
1221	vap->va_atime = pn->mn_atime;
1222	vap->va_mtime = pn->mn_mtime;
1223	vap->va_ctime = pn->mn_ctime;
1224	vap->va_birthtime = pn->mn_birth;
1225	vap->va_gen = 0;
1226	vap->va_flags = 0;
1227	vap->va_rdev = NODEV;
1228	vap->va_bytes = 0;
1229	vap->va_filerev = 0;
1230	return (error);
1231}
1232
1233#if 0
1234struct vop_setattr_args {
1235	struct vop_generic_args a_gen;
1236	struct vnode *a_vp;
1237	struct vattr *a_vap;
1238	struct ucred *a_cred;
1239};
1240#endif
1241/*
1242 * Set attributes
1243 */
1244static int
1245mqfs_setattr(struct vop_setattr_args *ap)
1246{
1247	struct mqfs_node *pn;
1248	struct vattr *vap;
1249	struct vnode *vp;
1250	struct thread *td;
1251	int c, error;
1252	uid_t uid;
1253	gid_t gid;
1254
1255	td = curthread;
1256	vap = ap->a_vap;
1257	vp = ap->a_vp;
1258	if ((vap->va_type != VNON) ||
1259	    (vap->va_nlink != VNOVAL) ||
1260	    (vap->va_fsid != VNOVAL) ||
1261	    (vap->va_fileid != VNOVAL) ||
1262	    (vap->va_blocksize != VNOVAL) ||
1263	    (vap->va_flags != VNOVAL && vap->va_flags != 0) ||
1264	    (vap->va_rdev != VNOVAL) ||
1265	    ((int)vap->va_bytes != VNOVAL) ||
1266	    (vap->va_gen != VNOVAL)) {
1267		return (EINVAL);
1268	}
1269
1270	pn = VTON(vp);
1271
1272	error = c = 0;
1273	if (vap->va_uid == (uid_t)VNOVAL)
1274		uid = pn->mn_uid;
1275	else
1276		uid = vap->va_uid;
1277	if (vap->va_gid == (gid_t)VNOVAL)
1278		gid = pn->mn_gid;
1279	else
1280		gid = vap->va_gid;
1281
1282	if (uid != pn->mn_uid || gid != pn->mn_gid) {
1283		/*
1284		 * To modify the ownership of a file, must possess VADMIN
1285		 * for that file.
1286		 */
1287		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)))
1288			return (error);
1289
1290		/*
1291		 * XXXRW: Why is there a privilege check here: shouldn't the
1292		 * check in VOP_ACCESS() be enough?  Also, are the group bits
1293		 * below definitely right?
1294		 */
1295		if (((ap->a_cred->cr_uid != pn->mn_uid) || uid != pn->mn_uid ||
1296		    (gid != pn->mn_gid && !groupmember(gid, ap->a_cred))) &&
1297		    (error = priv_check(td, PRIV_MQ_ADMIN)) != 0)
1298			return (error);
1299		pn->mn_uid = uid;
1300		pn->mn_gid = gid;
1301		c = 1;
1302	}
1303
1304	if (vap->va_mode != (mode_t)VNOVAL) {
1305		if ((ap->a_cred->cr_uid != pn->mn_uid) &&
1306		    (error = priv_check(td, PRIV_MQ_ADMIN)))
1307			return (error);
1308		pn->mn_mode = vap->va_mode;
1309		c = 1;
1310	}
1311
1312	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
1313		/* See the comment in ufs_vnops::ufs_setattr(). */
1314		if ((error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td)) &&
1315		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
1316		    (error = VOP_ACCESS(vp, VWRITE, ap->a_cred, td))))
1317			return (error);
1318		if (vap->va_atime.tv_sec != VNOVAL) {
1319			pn->mn_atime = vap->va_atime;
1320		}
1321		if (vap->va_mtime.tv_sec != VNOVAL) {
1322			pn->mn_mtime = vap->va_mtime;
1323		}
1324		c = 1;
1325	}
1326	if (c) {
1327		vfs_timestamp(&pn->mn_ctime);
1328	}
1329	return (0);
1330}
1331
1332#if 0
1333struct vop_read_args {
1334	struct vop_generic_args a_gen;
1335	struct vnode *a_vp;
1336	struct uio *a_uio;
1337	int a_ioflag;
1338	struct ucred *a_cred;
1339};
1340#endif
1341
1342/*
1343 * Read from a file
1344 */
1345static int
1346mqfs_read(struct vop_read_args *ap)
1347{
1348	char buf[80];
1349	struct vnode *vp = ap->a_vp;
1350	struct uio *uio = ap->a_uio;
1351	struct mqfs_node *pn;
1352	struct mqueue *mq;
1353	int len, error;
1354
1355	if (vp->v_type != VREG)
1356		return (EINVAL);
1357
1358	pn = VTON(vp);
1359	mq = VTOMQ(vp);
1360	snprintf(buf, sizeof(buf),
1361		"QSIZE:%-10ld MAXMSG:%-10ld CURMSG:%-10ld MSGSIZE:%-10ld\n",
1362		mq->mq_totalbytes,
1363		mq->mq_maxmsg,
1364		mq->mq_curmsgs,
1365		mq->mq_msgsize);
1366	buf[sizeof(buf)-1] = '\0';
1367	len = strlen(buf);
1368	error = uiomove_frombuf(buf, len, uio);
1369	return (error);
1370}
1371
1372#if 0
1373struct vop_readdir_args {
1374	struct vop_generic_args a_gen;
1375	struct vnode *a_vp;
1376	struct uio *a_uio;
1377	struct ucred *a_cred;
1378	int *a_eofflag;
1379	int *a_ncookies;
1380	u_long **a_cookies;
1381};
1382#endif
1383
1384/*
1385 * Return directory entries.
1386 */
1387static int
1388mqfs_readdir(struct vop_readdir_args *ap)
1389{
1390	struct vnode *vp;
1391	struct mqfs_info *mi;
1392	struct mqfs_node *pd;
1393	struct mqfs_node *pn;
1394	struct dirent entry;
1395	struct uio *uio;
1396	const void *pr_root;
1397	int *tmp_ncookies = NULL;
1398	off_t offset;
1399	int error, i;
1400
1401	vp = ap->a_vp;
1402	mi = VFSTOMQFS(vp->v_mount);
1403	pd = VTON(vp);
1404	uio = ap->a_uio;
1405
1406	if (vp->v_type != VDIR)
1407		return (ENOTDIR);
1408
1409	if (uio->uio_offset < 0)
1410		return (EINVAL);
1411
1412	if (ap->a_ncookies != NULL) {
1413		tmp_ncookies = ap->a_ncookies;
1414		*ap->a_ncookies = 0;
1415		ap->a_ncookies = NULL;
1416        }
1417
1418	error = 0;
1419	offset = 0;
1420
1421	pr_root = ap->a_cred->cr_prison->pr_root;
1422	sx_xlock(&mi->mi_lock);
1423
1424	LIST_FOREACH(pn, &pd->mn_children, mn_sibling) {
1425		entry.d_reclen = sizeof(entry);
1426
1427		/*
1428		 * Only show names within the same prison root directory
1429		 * (or not associated with a prison, e.g. "." and "..").
1430		 */
1431		if (pn->mn_pr_root != NULL && pn->mn_pr_root != pr_root)
1432			continue;
1433		if (!pn->mn_fileno)
1434			mqfs_fileno_alloc(mi, pn);
1435		entry.d_fileno = pn->mn_fileno;
1436		for (i = 0; i < MQFS_NAMELEN - 1 && pn->mn_name[i] != '\0'; ++i)
1437			entry.d_name[i] = pn->mn_name[i];
1438		entry.d_name[i] = 0;
1439		entry.d_namlen = i;
1440		switch (pn->mn_type) {
1441		case mqfstype_root:
1442		case mqfstype_dir:
1443		case mqfstype_this:
1444		case mqfstype_parent:
1445			entry.d_type = DT_DIR;
1446			break;
1447		case mqfstype_file:
1448			entry.d_type = DT_REG;
1449			break;
1450		case mqfstype_symlink:
1451			entry.d_type = DT_LNK;
1452			break;
1453		default:
1454			panic("%s has unexpected node type: %d", pn->mn_name,
1455				pn->mn_type);
1456		}
1457		if (entry.d_reclen > uio->uio_resid)
1458                        break;
1459		if (offset >= uio->uio_offset) {
1460			error = vfs_read_dirent(ap, &entry, offset);
1461                        if (error)
1462                                break;
1463                }
1464                offset += entry.d_reclen;
1465	}
1466	sx_xunlock(&mi->mi_lock);
1467
1468	uio->uio_offset = offset;
1469
1470	if (tmp_ncookies != NULL)
1471		ap->a_ncookies = tmp_ncookies;
1472
1473	return (error);
1474}
1475
1476#ifdef notyet
1477
1478#if 0
1479struct vop_mkdir_args {
1480	struct vnode *a_dvp;
1481	struvt vnode **a_vpp;
1482	struvt componentname *a_cnp;
1483	struct vattr *a_vap;
1484};
1485#endif
1486
1487/*
1488 * Create a directory.
1489 */
1490static int
1491mqfs_mkdir(struct vop_mkdir_args *ap)
1492{
1493	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1494	struct componentname *cnp = ap->a_cnp;
1495	struct mqfs_node *pd = VTON(ap->a_dvp);
1496	struct mqfs_node *pn;
1497	int error;
1498
1499	if (pd->mn_type != mqfstype_root && pd->mn_type != mqfstype_dir)
1500		return (ENOTDIR);
1501	sx_xlock(&mqfs->mi_lock);
1502	if ((cnp->cn_flags & HASBUF) == 0)
1503		panic("%s: no name", __func__);
1504	pn = mqfs_create_dir(pd, cnp->cn_nameptr, cnp->cn_namelen,
1505		ap->a_vap->cn_cred, ap->a_vap->va_mode);
1506	if (pn != NULL)
1507		mqnode_addref(pn);
1508	sx_xunlock(&mqfs->mi_lock);
1509	if (pn == NULL) {
1510		error = ENOSPC;
1511	} else {
1512		error = mqfs_allocv(ap->a_dvp->v_mount, ap->a_vpp, pn);
1513		mqnode_release(pn);
1514	}
1515	return (error);
1516}
1517
1518#if 0
1519struct vop_rmdir_args {
1520	struct vnode *a_dvp;
1521	struct vnode *a_vp;
1522	struct componentname *a_cnp;
1523};
1524#endif
1525
1526/*
1527 * Remove a directory.
1528 */
1529static int
1530mqfs_rmdir(struct vop_rmdir_args *ap)
1531{
1532	struct mqfs_info *mqfs = VFSTOMQFS(ap->a_dvp->v_mount);
1533	struct mqfs_node *pn = VTON(ap->a_vp);
1534	struct mqfs_node *pt;
1535
1536	if (pn->mn_type != mqfstype_dir)
1537		return (ENOTDIR);
1538
1539	sx_xlock(&mqfs->mi_lock);
1540	if (pn->mn_deleted) {
1541		sx_xunlock(&mqfs->mi_lock);
1542		return (ENOENT);
1543	}
1544
1545	pt = LIST_FIRST(&pn->mn_children);
1546	pt = LIST_NEXT(pt, mn_sibling);
1547	pt = LIST_NEXT(pt, mn_sibling);
1548	if (pt != NULL) {
1549		sx_xunlock(&mqfs->mi_lock);
1550		return (ENOTEMPTY);
1551	}
1552	pt = pn->mn_parent;
1553	pn->mn_parent = NULL;
1554	pn->mn_deleted = 1;
1555	LIST_REMOVE(pn, mn_sibling);
1556	mqnode_release(pn);
1557	mqnode_release(pt);
1558	sx_xunlock(&mqfs->mi_lock);
1559	cache_purge(ap->a_vp);
1560	return (0);
1561}
1562
1563#endif /* notyet */
1564
1565
1566/*
1567 * Set a destructor task with the prison's root
1568 */
1569static int
1570mqfs_prison_create(void *obj, void *data __unused)
1571{
1572	struct prison *pr = obj;
1573	struct mqfs_osd *mo;
1574	void *rsv;
1575
1576	if (pr->pr_root == pr->pr_parent->pr_root)
1577		return(0);
1578
1579	mo = malloc(sizeof(struct mqfs_osd), M_PRISON, M_WAITOK);
1580	rsv = osd_reserve(mqfs_osd_jail_slot);
1581	TASK_INIT(&mo->mo_task, 0, mqfs_prison_remove_task, mo);
1582	mtx_lock(&pr->pr_mtx);
1583	mo->mo_pr_root = pr->pr_root;
1584	(void)osd_jail_set_reserved(pr, mqfs_osd_jail_slot, rsv, mo);
1585	mtx_unlock(&pr->pr_mtx);
1586	return (0);
1587}
1588
1589/*
1590 * Queue the task for after jail/OSD locks are released
1591 */
1592static void
1593mqfs_prison_destructor(void *data)
1594{
1595	struct mqfs_osd *mo = data;
1596
1597	if (mqfs_osd_jail_slot != 0)
1598		taskqueue_enqueue(taskqueue_thread, &mo->mo_task);
1599	else
1600		free(mo, M_PRISON);
1601}
1602
1603/*
1604 * See if this prison root is obsolete, and clean up associated queues if it is
1605 */
1606static void
1607mqfs_prison_remove_task(void *context, int pending)
1608{
1609	struct mqfs_osd *mo = context;
1610	struct mqfs_node *pn, *tpn;
1611	const struct prison *pr;
1612	const void *pr_root;
1613	int found;
1614
1615	pr_root = mo->mo_pr_root;
1616	found = 0;
1617	sx_slock(&allprison_lock);
1618	TAILQ_FOREACH(pr, &allprison, pr_list) {
1619		if (pr->pr_root == pr_root)
1620			found = 1;
1621	}
1622	sx_sunlock(&allprison_lock);
1623	if (!found) {
1624		/*
1625		 * No jails are rooted in this directory anymore,
1626		 * so no queues should be either.
1627		 */
1628		sx_xlock(&mqfs_data.mi_lock);
1629		LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
1630		    mn_sibling, tpn) {
1631			if (pn->mn_pr_root == pr_root)
1632				(void)do_unlink(pn, curthread->td_ucred);
1633		}
1634		sx_xunlock(&mqfs_data.mi_lock);
1635	}
1636	free(mo, M_PRISON);
1637}
1638
1639
1640/*
1641 * Allocate a message queue
1642 */
1643static struct mqueue *
1644mqueue_alloc(const struct mq_attr *attr)
1645{
1646	struct mqueue *mq;
1647
1648	if (curmq >= maxmq)
1649		return (NULL);
1650	mq = uma_zalloc(mqueue_zone, M_WAITOK | M_ZERO);
1651	TAILQ_INIT(&mq->mq_msgq);
1652	if (attr != NULL) {
1653		mq->mq_maxmsg = attr->mq_maxmsg;
1654		mq->mq_msgsize = attr->mq_msgsize;
1655	} else {
1656		mq->mq_maxmsg = default_maxmsg;
1657		mq->mq_msgsize = default_msgsize;
1658	}
1659	mtx_init(&mq->mq_mutex, "mqueue lock", NULL, MTX_DEF);
1660	knlist_init_mtx(&mq->mq_rsel.si_note, &mq->mq_mutex);
1661	knlist_init_mtx(&mq->mq_wsel.si_note, &mq->mq_mutex);
1662	atomic_add_int(&curmq, 1);
1663	return (mq);
1664}
1665
1666/*
1667 * Destroy a message queue
1668 */
1669static void
1670mqueue_free(struct mqueue *mq)
1671{
1672	struct mqueue_msg *msg;
1673
1674	while ((msg = TAILQ_FIRST(&mq->mq_msgq)) != NULL) {
1675		TAILQ_REMOVE(&mq->mq_msgq, msg, msg_link);
1676		free(msg, M_MQUEUEDATA);
1677	}
1678
1679	mtx_destroy(&mq->mq_mutex);
1680	seldrain(&mq->mq_rsel);
1681	seldrain(&mq->mq_wsel);
1682	knlist_destroy(&mq->mq_rsel.si_note);
1683	knlist_destroy(&mq->mq_wsel.si_note);
1684	uma_zfree(mqueue_zone, mq);
1685	atomic_add_int(&curmq, -1);
1686}
1687
1688/*
1689 * Load a message from user space
1690 */
1691static struct mqueue_msg *
1692mqueue_loadmsg(const char *msg_ptr, size_t msg_size, int msg_prio)
1693{
1694	struct mqueue_msg *msg;
1695	size_t len;
1696	int error;
1697
1698	len = sizeof(struct mqueue_msg) + msg_size;
1699	msg = malloc(len, M_MQUEUEDATA, M_WAITOK);
1700	error = copyin(msg_ptr, ((char *)msg) + sizeof(struct mqueue_msg),
1701	    msg_size);
1702	if (error) {
1703		free(msg, M_MQUEUEDATA);
1704		msg = NULL;
1705	} else {
1706		msg->msg_size = msg_size;
1707		msg->msg_prio = msg_prio;
1708	}
1709	return (msg);
1710}
1711
1712/*
1713 * Save a message to user space
1714 */
1715static int
1716mqueue_savemsg(struct mqueue_msg *msg, char *msg_ptr, int *msg_prio)
1717{
1718	int error;
1719
1720	error = copyout(((char *)msg) + sizeof(*msg), msg_ptr,
1721		msg->msg_size);
1722	if (error == 0 && msg_prio != NULL)
1723		error = copyout(&msg->msg_prio, msg_prio, sizeof(int));
1724	return (error);
1725}
1726
1727/*
1728 * Free a message's memory
1729 */
1730static __inline void
1731mqueue_freemsg(struct mqueue_msg *msg)
1732{
1733	free(msg, M_MQUEUEDATA);
1734}
1735
1736/*
1737 * Send a message. if waitok is false, thread will not be
1738 * blocked if there is no data in queue, otherwise, absolute
1739 * time will be checked.
1740 */
1741int
1742mqueue_send(struct mqueue *mq, const char *msg_ptr,
1743	size_t msg_len, unsigned msg_prio, int waitok,
1744	const struct timespec *abs_timeout)
1745{
1746	struct mqueue_msg *msg;
1747	struct timespec ts, ts2;
1748	struct timeval tv;
1749	int error;
1750
1751	if (msg_prio >= MQ_PRIO_MAX)
1752		return (EINVAL);
1753	if (msg_len > mq->mq_msgsize)
1754		return (EMSGSIZE);
1755	msg = mqueue_loadmsg(msg_ptr, msg_len, msg_prio);
1756	if (msg == NULL)
1757		return (EFAULT);
1758
1759	/* O_NONBLOCK case */
1760	if (!waitok) {
1761		error = _mqueue_send(mq, msg, -1);
1762		if (error)
1763			goto bad;
1764		return (0);
1765	}
1766
1767	/* we allow a null timeout (wait forever) */
1768	if (abs_timeout == NULL) {
1769		error = _mqueue_send(mq, msg, 0);
1770		if (error)
1771			goto bad;
1772		return (0);
1773	}
1774
1775	/* send it before checking time */
1776	error = _mqueue_send(mq, msg, -1);
1777	if (error == 0)
1778		return (0);
1779
1780	if (error != EAGAIN)
1781		goto bad;
1782
1783	if (abs_timeout->tv_nsec >= 1000000000 || abs_timeout->tv_nsec < 0) {
1784		error = EINVAL;
1785		goto bad;
1786	}
1787	for (;;) {
1788		ts2 = *abs_timeout;
1789		getnanotime(&ts);
1790		timespecsub(&ts2, &ts);
1791		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
1792			error = ETIMEDOUT;
1793			break;
1794		}
1795		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
1796		error = _mqueue_send(mq, msg, tvtohz(&tv));
1797		if (error != ETIMEDOUT)
1798			break;
1799	}
1800	if (error == 0)
1801		return (0);
1802bad:
1803	mqueue_freemsg(msg);
1804	return (error);
1805}
1806
1807/*
1808 * Common routine to send a message
1809 */
1810static int
1811_mqueue_send(struct mqueue *mq, struct mqueue_msg *msg, int timo)
1812{
1813	struct mqueue_msg *msg2;
1814	int error = 0;
1815
1816	mtx_lock(&mq->mq_mutex);
1817	while (mq->mq_curmsgs >= mq->mq_maxmsg && error == 0) {
1818		if (timo < 0) {
1819			mtx_unlock(&mq->mq_mutex);
1820			return (EAGAIN);
1821		}
1822		mq->mq_senders++;
1823		error = msleep(&mq->mq_senders, &mq->mq_mutex,
1824			    PCATCH, "mqsend", timo);
1825		mq->mq_senders--;
1826		if (error == EAGAIN)
1827			error = ETIMEDOUT;
1828	}
1829	if (mq->mq_curmsgs >= mq->mq_maxmsg) {
1830		mtx_unlock(&mq->mq_mutex);
1831		return (error);
1832	}
1833	error = 0;
1834	if (TAILQ_EMPTY(&mq->mq_msgq)) {
1835		TAILQ_INSERT_HEAD(&mq->mq_msgq, msg, msg_link);
1836	} else {
1837		if (msg->msg_prio <= TAILQ_LAST(&mq->mq_msgq, msgq)->msg_prio) {
1838			TAILQ_INSERT_TAIL(&mq->mq_msgq, msg, msg_link);
1839		} else {
1840			TAILQ_FOREACH(msg2, &mq->mq_msgq, msg_link) {
1841				if (msg2->msg_prio < msg->msg_prio)
1842					break;
1843			}
1844			TAILQ_INSERT_BEFORE(msg2, msg, msg_link);
1845		}
1846	}
1847	mq->mq_curmsgs++;
1848	mq->mq_totalbytes += msg->msg_size;
1849	if (mq->mq_receivers)
1850		wakeup_one(&mq->mq_receivers);
1851	else if (mq->mq_notifier != NULL)
1852		mqueue_send_notification(mq);
1853	if (mq->mq_flags & MQ_RSEL) {
1854		mq->mq_flags &= ~MQ_RSEL;
1855		selwakeup(&mq->mq_rsel);
1856	}
1857	KNOTE_LOCKED(&mq->mq_rsel.si_note, 0);
1858	mtx_unlock(&mq->mq_mutex);
1859	return (0);
1860}
1861
1862/*
1863 * Send realtime a signal to process which registered itself
1864 * successfully by mq_notify.
1865 */
1866static void
1867mqueue_send_notification(struct mqueue *mq)
1868{
1869	struct mqueue_notifier *nt;
1870	struct thread *td;
1871	struct proc *p;
1872	int error;
1873
1874	mtx_assert(&mq->mq_mutex, MA_OWNED);
1875	nt = mq->mq_notifier;
1876	if (nt->nt_sigev.sigev_notify != SIGEV_NONE) {
1877		p = nt->nt_proc;
1878		error = sigev_findtd(p, &nt->nt_sigev, &td);
1879		if (error) {
1880			mq->mq_notifier = NULL;
1881			return;
1882		}
1883		if (!KSI_ONQ(&nt->nt_ksi)) {
1884			ksiginfo_set_sigev(&nt->nt_ksi, &nt->nt_sigev);
1885			tdsendsignal(p, td, nt->nt_ksi.ksi_signo, &nt->nt_ksi);
1886		}
1887		PROC_UNLOCK(p);
1888	}
1889	mq->mq_notifier = NULL;
1890}
1891
1892/*
1893 * Get a message. if waitok is false, thread will not be
1894 * blocked if there is no data in queue, otherwise, absolute
1895 * time will be checked.
1896 */
1897int
1898mqueue_receive(struct mqueue *mq, char *msg_ptr,
1899	size_t msg_len, unsigned *msg_prio, int waitok,
1900	const struct timespec *abs_timeout)
1901{
1902	struct mqueue_msg *msg;
1903	struct timespec ts, ts2;
1904	struct timeval tv;
1905	int error;
1906
1907	if (msg_len < mq->mq_msgsize)
1908		return (EMSGSIZE);
1909
1910	/* O_NONBLOCK case */
1911	if (!waitok) {
1912		error = _mqueue_recv(mq, &msg, -1);
1913		if (error)
1914			return (error);
1915		goto received;
1916	}
1917
1918	/* we allow a null timeout (wait forever). */
1919	if (abs_timeout == NULL) {
1920		error = _mqueue_recv(mq, &msg, 0);
1921		if (error)
1922			return (error);
1923		goto received;
1924	}
1925
1926	/* try to get a message before checking time */
1927	error = _mqueue_recv(mq, &msg, -1);
1928	if (error == 0)
1929		goto received;
1930
1931	if (error != EAGAIN)
1932		return (error);
1933
1934	if (abs_timeout->tv_nsec >= 1000000000 || abs_timeout->tv_nsec < 0) {
1935		error = EINVAL;
1936		return (error);
1937	}
1938
1939	for (;;) {
1940		ts2 = *abs_timeout;
1941		getnanotime(&ts);
1942		timespecsub(&ts2, &ts);
1943		if (ts2.tv_sec < 0 || (ts2.tv_sec == 0 && ts2.tv_nsec <= 0)) {
1944			error = ETIMEDOUT;
1945			return (error);
1946		}
1947		TIMESPEC_TO_TIMEVAL(&tv, &ts2);
1948		error = _mqueue_recv(mq, &msg, tvtohz(&tv));
1949		if (error == 0)
1950			break;
1951		if (error != ETIMEDOUT)
1952			return (error);
1953	}
1954
1955received:
1956	error = mqueue_savemsg(msg, msg_ptr, msg_prio);
1957	if (error == 0) {
1958		curthread->td_retval[0] = msg->msg_size;
1959		curthread->td_retval[1] = 0;
1960	}
1961	mqueue_freemsg(msg);
1962	return (error);
1963}
1964
1965/*
1966 * Common routine to receive a message
1967 */
1968static int
1969_mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg, int timo)
1970{
1971	int error = 0;
1972
1973	mtx_lock(&mq->mq_mutex);
1974	while ((*msg = TAILQ_FIRST(&mq->mq_msgq)) == NULL && error == 0) {
1975		if (timo < 0) {
1976			mtx_unlock(&mq->mq_mutex);
1977			return (EAGAIN);
1978		}
1979		mq->mq_receivers++;
1980		error = msleep(&mq->mq_receivers, &mq->mq_mutex,
1981			    PCATCH, "mqrecv", timo);
1982		mq->mq_receivers--;
1983		if (error == EAGAIN)
1984			error = ETIMEDOUT;
1985	}
1986	if (*msg != NULL) {
1987		error = 0;
1988		TAILQ_REMOVE(&mq->mq_msgq, *msg, msg_link);
1989		mq->mq_curmsgs--;
1990		mq->mq_totalbytes -= (*msg)->msg_size;
1991		if (mq->mq_senders)
1992			wakeup_one(&mq->mq_senders);
1993		if (mq->mq_flags & MQ_WSEL) {
1994			mq->mq_flags &= ~MQ_WSEL;
1995			selwakeup(&mq->mq_wsel);
1996		}
1997		KNOTE_LOCKED(&mq->mq_wsel.si_note, 0);
1998	}
1999	if (mq->mq_notifier != NULL && mq->mq_receivers == 0 &&
2000	    !TAILQ_EMPTY(&mq->mq_msgq)) {
2001		mqueue_send_notification(mq);
2002	}
2003	mtx_unlock(&mq->mq_mutex);
2004	return (error);
2005}
2006
2007static __inline struct mqueue_notifier *
2008notifier_alloc(void)
2009{
2010	return (uma_zalloc(mqnoti_zone, M_WAITOK | M_ZERO));
2011}
2012
2013static __inline void
2014notifier_free(struct mqueue_notifier *p)
2015{
2016	uma_zfree(mqnoti_zone, p);
2017}
2018
2019static struct mqueue_notifier *
2020notifier_search(struct proc *p, int fd)
2021{
2022	struct mqueue_notifier *nt;
2023
2024	LIST_FOREACH(nt, &p->p_mqnotifier, nt_link) {
2025		if (nt->nt_ksi.ksi_mqd == fd)
2026			break;
2027	}
2028	return (nt);
2029}
2030
2031static __inline void
2032notifier_insert(struct proc *p, struct mqueue_notifier *nt)
2033{
2034	LIST_INSERT_HEAD(&p->p_mqnotifier, nt, nt_link);
2035}
2036
2037static __inline void
2038notifier_delete(struct proc *p, struct mqueue_notifier *nt)
2039{
2040	LIST_REMOVE(nt, nt_link);
2041	notifier_free(nt);
2042}
2043
2044static void
2045notifier_remove(struct proc *p, struct mqueue *mq, int fd)
2046{
2047	struct mqueue_notifier *nt;
2048
2049	mtx_assert(&mq->mq_mutex, MA_OWNED);
2050	PROC_LOCK(p);
2051	nt = notifier_search(p, fd);
2052	if (nt != NULL) {
2053		if (mq->mq_notifier == nt)
2054			mq->mq_notifier = NULL;
2055		sigqueue_take(&nt->nt_ksi);
2056		notifier_delete(p, nt);
2057	}
2058	PROC_UNLOCK(p);
2059}
2060
2061static int
2062kern_kmq_open(struct thread *td, const char *upath, int flags, mode_t mode,
2063    const struct mq_attr *attr)
2064{
2065	char path[MQFS_NAMELEN + 1];
2066	struct mqfs_node *pn;
2067	struct filedesc *fdp;
2068	struct file *fp;
2069	struct mqueue *mq;
2070	int fd, error, len, cmode;
2071
2072	fdp = td->td_proc->p_fd;
2073	cmode = (((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT);
2074	mq = NULL;
2075	if ((flags & O_CREAT) != 0 && attr != NULL) {
2076		if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > maxmsg)
2077			return (EINVAL);
2078		if (attr->mq_msgsize <= 0 || attr->mq_msgsize > maxmsgsize)
2079			return (EINVAL);
2080	}
2081
2082	error = copyinstr(upath, path, MQFS_NAMELEN + 1, NULL);
2083        if (error)
2084		return (error);
2085
2086	/*
2087	 * The first character of name must be a slash  (/) character
2088	 * and the remaining characters of name cannot include any slash
2089	 * characters.
2090	 */
2091	len = strlen(path);
2092	if (len < 2 || path[0] != '/' || strchr(path + 1, '/') != NULL)
2093		return (EINVAL);
2094
2095	error = falloc(td, &fp, &fd, O_CLOEXEC);
2096	if (error)
2097		return (error);
2098
2099	sx_xlock(&mqfs_data.mi_lock);
2100	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
2101	if (pn == NULL) {
2102		if (!(flags & O_CREAT)) {
2103			error = ENOENT;
2104		} else {
2105			mq = mqueue_alloc(attr);
2106			if (mq == NULL) {
2107				error = ENFILE;
2108			} else {
2109				pn = mqfs_create_file(mqfs_data.mi_root,
2110				         path + 1, len - 1, td->td_ucred,
2111					 cmode);
2112				if (pn == NULL) {
2113					error = ENOSPC;
2114					mqueue_free(mq);
2115				}
2116			}
2117		}
2118
2119		if (error == 0) {
2120			pn->mn_data = mq;
2121		}
2122	} else {
2123		if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) {
2124			error = EEXIST;
2125		} else {
2126			accmode_t accmode = 0;
2127
2128			if (flags & FREAD)
2129				accmode |= VREAD;
2130			if (flags & FWRITE)
2131				accmode |= VWRITE;
2132			error = vaccess(VREG, pn->mn_mode, pn->mn_uid,
2133				    pn->mn_gid, accmode, td->td_ucred, NULL);
2134		}
2135	}
2136
2137	if (error) {
2138		sx_xunlock(&mqfs_data.mi_lock);
2139		fdclose(fdp, fp, fd, td);
2140		fdrop(fp, td);
2141		return (error);
2142	}
2143
2144	mqnode_addref(pn);
2145	sx_xunlock(&mqfs_data.mi_lock);
2146
2147	finit(fp, flags & (FREAD | FWRITE | O_NONBLOCK), DTYPE_MQUEUE, pn,
2148	    &mqueueops);
2149
2150	td->td_retval[0] = fd;
2151	fdrop(fp, td);
2152	return (0);
2153}
2154
2155/*
2156 * Syscall to open a message queue.
2157 */
2158int
2159sys_kmq_open(struct thread *td, struct kmq_open_args *uap)
2160{
2161	struct mq_attr attr;
2162	int flags, error;
2163
2164	if ((uap->flags & O_ACCMODE) == O_ACCMODE || uap->flags & O_EXEC)
2165		return (EINVAL);
2166	flags = FFLAGS(uap->flags);
2167	if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
2168		error = copyin(uap->attr, &attr, sizeof(attr));
2169		if (error)
2170			return (error);
2171	}
2172	return (kern_kmq_open(td, uap->path, flags, uap->mode,
2173	    uap->attr != NULL ? &attr : NULL));
2174}
2175
2176/*
2177 * Syscall to unlink a message queue.
2178 */
2179int
2180sys_kmq_unlink(struct thread *td, struct kmq_unlink_args *uap)
2181{
2182	char path[MQFS_NAMELEN+1];
2183	struct mqfs_node *pn;
2184	int error, len;
2185
2186	error = copyinstr(uap->path, path, MQFS_NAMELEN + 1, NULL);
2187        if (error)
2188		return (error);
2189
2190	len = strlen(path);
2191	if (len < 2 || path[0] != '/' || strchr(path + 1, '/') != NULL)
2192		return (EINVAL);
2193
2194	sx_xlock(&mqfs_data.mi_lock);
2195	pn = mqfs_search(mqfs_data.mi_root, path + 1, len - 1, td->td_ucred);
2196	if (pn != NULL)
2197		error = do_unlink(pn, td->td_ucred);
2198	else
2199		error = ENOENT;
2200	sx_xunlock(&mqfs_data.mi_lock);
2201	return (error);
2202}
2203
2204typedef int (*_fgetf)(struct thread *, int, cap_rights_t *, struct file **);
2205
2206/*
2207 * Get message queue by giving file slot
2208 */
2209static int
2210_getmq(struct thread *td, int fd, cap_rights_t *rightsp, _fgetf func,
2211       struct file **fpp, struct mqfs_node **ppn, struct mqueue **pmq)
2212{
2213	struct mqfs_node *pn;
2214	int error;
2215
2216	error = func(td, fd, rightsp, fpp);
2217	if (error)
2218		return (error);
2219	if (&mqueueops != (*fpp)->f_ops) {
2220		fdrop(*fpp, td);
2221		return (EBADF);
2222	}
2223	pn = (*fpp)->f_data;
2224	if (ppn)
2225		*ppn = pn;
2226	if (pmq)
2227		*pmq = pn->mn_data;
2228	return (0);
2229}
2230
2231static __inline int
2232getmq(struct thread *td, int fd, struct file **fpp, struct mqfs_node **ppn,
2233	struct mqueue **pmq)
2234{
2235	cap_rights_t rights;
2236
2237	return _getmq(td, fd, cap_rights_init(&rights, CAP_EVENT), fget,
2238	    fpp, ppn, pmq);
2239}
2240
2241static __inline int
2242getmq_read(struct thread *td, int fd, struct file **fpp,
2243	 struct mqfs_node **ppn, struct mqueue **pmq)
2244{
2245	cap_rights_t rights;
2246
2247	return _getmq(td, fd, cap_rights_init(&rights, CAP_READ), fget_read,
2248	    fpp, ppn, pmq);
2249}
2250
2251static __inline int
2252getmq_write(struct thread *td, int fd, struct file **fpp,
2253	struct mqfs_node **ppn, struct mqueue **pmq)
2254{
2255	cap_rights_t rights;
2256
2257	return _getmq(td, fd, cap_rights_init(&rights, CAP_WRITE), fget_write,
2258	    fpp, ppn, pmq);
2259}
2260
2261static int
2262kern_kmq_setattr(struct thread *td, int mqd, const struct mq_attr *attr,
2263    struct mq_attr *oattr)
2264{
2265	struct mqueue *mq;
2266	struct file *fp;
2267	u_int oflag, flag;
2268	int error;
2269
2270	if (attr != NULL && (attr->mq_flags & ~O_NONBLOCK) != 0)
2271		return (EINVAL);
2272	error = getmq(td, mqd, &fp, NULL, &mq);
2273	if (error)
2274		return (error);
2275	oattr->mq_maxmsg  = mq->mq_maxmsg;
2276	oattr->mq_msgsize = mq->mq_msgsize;
2277	oattr->mq_curmsgs = mq->mq_curmsgs;
2278	if (attr != NULL) {
2279		do {
2280			oflag = flag = fp->f_flag;
2281			flag &= ~O_NONBLOCK;
2282			flag |= (attr->mq_flags & O_NONBLOCK);
2283		} while (atomic_cmpset_int(&fp->f_flag, oflag, flag) == 0);
2284	} else
2285		oflag = fp->f_flag;
2286	oattr->mq_flags = (O_NONBLOCK & oflag);
2287	fdrop(fp, td);
2288	return (error);
2289}
2290
2291int
2292sys_kmq_setattr(struct thread *td, struct kmq_setattr_args *uap)
2293{
2294	struct mq_attr attr, oattr;
2295	int error;
2296
2297	if (uap->attr != NULL) {
2298		error = copyin(uap->attr, &attr, sizeof(attr));
2299		if (error != 0)
2300			return (error);
2301	}
2302	error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
2303	    &oattr);
2304	if (error != 0)
2305		return (error);
2306	if (uap->oattr != NULL)
2307		error = copyout(&oattr, uap->oattr, sizeof(oattr));
2308	return (error);
2309}
2310
2311int
2312sys_kmq_timedreceive(struct thread *td, struct kmq_timedreceive_args *uap)
2313{
2314	struct mqueue *mq;
2315	struct file *fp;
2316	struct timespec *abs_timeout, ets;
2317	int error;
2318	int waitok;
2319
2320	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
2321	if (error)
2322		return (error);
2323	if (uap->abs_timeout != NULL) {
2324		error = copyin(uap->abs_timeout, &ets, sizeof(ets));
2325		if (error != 0)
2326			return (error);
2327		abs_timeout = &ets;
2328	} else
2329		abs_timeout = NULL;
2330	waitok = !(fp->f_flag & O_NONBLOCK);
2331	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
2332		uap->msg_prio, waitok, abs_timeout);
2333	fdrop(fp, td);
2334	return (error);
2335}
2336
2337int
2338sys_kmq_timedsend(struct thread *td, struct kmq_timedsend_args *uap)
2339{
2340	struct mqueue *mq;
2341	struct file *fp;
2342	struct timespec *abs_timeout, ets;
2343	int error, waitok;
2344
2345	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
2346	if (error)
2347		return (error);
2348	if (uap->abs_timeout != NULL) {
2349		error = copyin(uap->abs_timeout, &ets, sizeof(ets));
2350		if (error != 0)
2351			return (error);
2352		abs_timeout = &ets;
2353	} else
2354		abs_timeout = NULL;
2355	waitok = !(fp->f_flag & O_NONBLOCK);
2356	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
2357		uap->msg_prio, waitok, abs_timeout);
2358	fdrop(fp, td);
2359	return (error);
2360}
2361
2362static int
2363kern_kmq_notify(struct thread *td, int mqd, struct sigevent *sigev)
2364{
2365#ifdef CAPABILITIES
2366	cap_rights_t rights;
2367#endif
2368	struct filedesc *fdp;
2369	struct proc *p;
2370	struct mqueue *mq;
2371	struct file *fp, *fp2;
2372	struct mqueue_notifier *nt, *newnt = NULL;
2373	int error;
2374
2375	if (sigev != NULL) {
2376		if (sigev->sigev_notify != SIGEV_SIGNAL &&
2377		    sigev->sigev_notify != SIGEV_THREAD_ID &&
2378		    sigev->sigev_notify != SIGEV_NONE)
2379			return (EINVAL);
2380		if ((sigev->sigev_notify == SIGEV_SIGNAL ||
2381		    sigev->sigev_notify == SIGEV_THREAD_ID) &&
2382		    !_SIG_VALID(sigev->sigev_signo))
2383			return (EINVAL);
2384	}
2385	p = td->td_proc;
2386	fdp = td->td_proc->p_fd;
2387	error = getmq(td, mqd, &fp, NULL, &mq);
2388	if (error)
2389		return (error);
2390again:
2391	FILEDESC_SLOCK(fdp);
2392	fp2 = fget_locked(fdp, mqd);
2393	if (fp2 == NULL) {
2394		FILEDESC_SUNLOCK(fdp);
2395		error = EBADF;
2396		goto out;
2397	}
2398#ifdef CAPABILITIES
2399	error = cap_check(cap_rights(fdp, mqd),
2400	    cap_rights_init(&rights, CAP_EVENT));
2401	if (error) {
2402		FILEDESC_SUNLOCK(fdp);
2403		goto out;
2404	}
2405#endif
2406	if (fp2 != fp) {
2407		FILEDESC_SUNLOCK(fdp);
2408		error = EBADF;
2409		goto out;
2410	}
2411	mtx_lock(&mq->mq_mutex);
2412	FILEDESC_SUNLOCK(fdp);
2413	if (sigev != NULL) {
2414		if (mq->mq_notifier != NULL) {
2415			error = EBUSY;
2416		} else {
2417			PROC_LOCK(p);
2418			nt = notifier_search(p, mqd);
2419			if (nt == NULL) {
2420				if (newnt == NULL) {
2421					PROC_UNLOCK(p);
2422					mtx_unlock(&mq->mq_mutex);
2423					newnt = notifier_alloc();
2424					goto again;
2425				}
2426			}
2427
2428			if (nt != NULL) {
2429				sigqueue_take(&nt->nt_ksi);
2430				if (newnt != NULL) {
2431					notifier_free(newnt);
2432					newnt = NULL;
2433				}
2434			} else {
2435				nt = newnt;
2436				newnt = NULL;
2437				ksiginfo_init(&nt->nt_ksi);
2438				nt->nt_ksi.ksi_flags |= KSI_INS | KSI_EXT;
2439				nt->nt_ksi.ksi_code = SI_MESGQ;
2440				nt->nt_proc = p;
2441				nt->nt_ksi.ksi_mqd = mqd;
2442				notifier_insert(p, nt);
2443			}
2444			nt->nt_sigev = *sigev;
2445			mq->mq_notifier = nt;
2446			PROC_UNLOCK(p);
2447			/*
2448			 * if there is no receivers and message queue
2449			 * is not empty, we should send notification
2450			 * as soon as possible.
2451			 */
2452			if (mq->mq_receivers == 0 &&
2453			    !TAILQ_EMPTY(&mq->mq_msgq))
2454				mqueue_send_notification(mq);
2455		}
2456	} else {
2457		notifier_remove(p, mq, mqd);
2458	}
2459	mtx_unlock(&mq->mq_mutex);
2460
2461out:
2462	fdrop(fp, td);
2463	if (newnt != NULL)
2464		notifier_free(newnt);
2465	return (error);
2466}
2467
2468int
2469sys_kmq_notify(struct thread *td, struct kmq_notify_args *uap)
2470{
2471	struct sigevent ev, *evp;
2472	int error;
2473
2474	if (uap->sigev == NULL) {
2475		evp = NULL;
2476	} else {
2477		error = copyin(uap->sigev, &ev, sizeof(ev));
2478		if (error != 0)
2479			return (error);
2480		evp = &ev;
2481	}
2482	return (kern_kmq_notify(td, uap->mqd, evp));
2483}
2484
2485static void
2486mqueue_fdclose(struct thread *td, int fd, struct file *fp)
2487{
2488	struct filedesc *fdp;
2489	struct mqueue *mq;
2490
2491	fdp = td->td_proc->p_fd;
2492	FILEDESC_LOCK_ASSERT(fdp);
2493
2494	if (fp->f_ops == &mqueueops) {
2495		mq = FPTOMQ(fp);
2496		mtx_lock(&mq->mq_mutex);
2497		notifier_remove(td->td_proc, mq, fd);
2498
2499		/* have to wakeup thread in same process */
2500		if (mq->mq_flags & MQ_RSEL) {
2501			mq->mq_flags &= ~MQ_RSEL;
2502			selwakeup(&mq->mq_rsel);
2503		}
2504		if (mq->mq_flags & MQ_WSEL) {
2505			mq->mq_flags &= ~MQ_WSEL;
2506			selwakeup(&mq->mq_wsel);
2507		}
2508		mtx_unlock(&mq->mq_mutex);
2509	}
2510}
2511
2512static void
2513mq_proc_exit(void *arg __unused, struct proc *p)
2514{
2515	struct filedesc *fdp;
2516	struct file *fp;
2517	struct mqueue *mq;
2518	int i;
2519
2520	fdp = p->p_fd;
2521	FILEDESC_SLOCK(fdp);
2522	for (i = 0; i < fdp->fd_nfiles; ++i) {
2523		fp = fget_locked(fdp, i);
2524		if (fp != NULL && fp->f_ops == &mqueueops) {
2525			mq = FPTOMQ(fp);
2526			mtx_lock(&mq->mq_mutex);
2527			notifier_remove(p, FPTOMQ(fp), i);
2528			mtx_unlock(&mq->mq_mutex);
2529		}
2530	}
2531	FILEDESC_SUNLOCK(fdp);
2532	KASSERT(LIST_EMPTY(&p->p_mqnotifier), ("mq notifiers left"));
2533}
2534
2535static int
2536mqf_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
2537	int flags, struct thread *td)
2538{
2539	return (EOPNOTSUPP);
2540}
2541
2542static int
2543mqf_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
2544	int flags, struct thread *td)
2545{
2546	return (EOPNOTSUPP);
2547}
2548
2549static int
2550mqf_truncate(struct file *fp, off_t length, struct ucred *active_cred,
2551    struct thread *td)
2552{
2553
2554	return (EINVAL);
2555}
2556
2557static int
2558mqf_ioctl(struct file *fp, u_long cmd, void *data,
2559	struct ucred *active_cred, struct thread *td)
2560{
2561	return (ENOTTY);
2562}
2563
2564static int
2565mqf_poll(struct file *fp, int events, struct ucred *active_cred,
2566	struct thread *td)
2567{
2568	struct mqueue *mq = FPTOMQ(fp);
2569	int revents = 0;
2570
2571	mtx_lock(&mq->mq_mutex);
2572	if (events & (POLLIN | POLLRDNORM)) {
2573		if (mq->mq_curmsgs) {
2574			revents |= events & (POLLIN | POLLRDNORM);
2575		} else {
2576			mq->mq_flags |= MQ_RSEL;
2577			selrecord(td, &mq->mq_rsel);
2578 		}
2579	}
2580	if (events & POLLOUT) {
2581		if (mq->mq_curmsgs < mq->mq_maxmsg)
2582			revents |= POLLOUT;
2583		else {
2584			mq->mq_flags |= MQ_WSEL;
2585			selrecord(td, &mq->mq_wsel);
2586		}
2587	}
2588	mtx_unlock(&mq->mq_mutex);
2589	return (revents);
2590}
2591
2592static int
2593mqf_close(struct file *fp, struct thread *td)
2594{
2595	struct mqfs_node *pn;
2596
2597	fp->f_ops = &badfileops;
2598	pn = fp->f_data;
2599	fp->f_data = NULL;
2600	sx_xlock(&mqfs_data.mi_lock);
2601	mqnode_release(pn);
2602	sx_xunlock(&mqfs_data.mi_lock);
2603	return (0);
2604}
2605
2606static int
2607mqf_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
2608	struct thread *td)
2609{
2610	struct mqfs_node *pn = fp->f_data;
2611
2612	bzero(st, sizeof *st);
2613	sx_xlock(&mqfs_data.mi_lock);
2614	st->st_atim = pn->mn_atime;
2615	st->st_mtim = pn->mn_mtime;
2616	st->st_ctim = pn->mn_ctime;
2617	st->st_birthtim = pn->mn_birth;
2618	st->st_uid = pn->mn_uid;
2619	st->st_gid = pn->mn_gid;
2620	st->st_mode = S_IFIFO | pn->mn_mode;
2621	sx_xunlock(&mqfs_data.mi_lock);
2622	return (0);
2623}
2624
2625static int
2626mqf_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
2627    struct thread *td)
2628{
2629	struct mqfs_node *pn;
2630	int error;
2631
2632	error = 0;
2633	pn = fp->f_data;
2634	sx_xlock(&mqfs_data.mi_lock);
2635	error = vaccess(VREG, pn->mn_mode, pn->mn_uid, pn->mn_gid, VADMIN,
2636	    active_cred, NULL);
2637	if (error != 0)
2638		goto out;
2639	pn->mn_mode = mode & ACCESSPERMS;
2640out:
2641	sx_xunlock(&mqfs_data.mi_lock);
2642	return (error);
2643}
2644
2645static int
2646mqf_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
2647    struct thread *td)
2648{
2649	struct mqfs_node *pn;
2650	int error;
2651
2652	error = 0;
2653	pn = fp->f_data;
2654	sx_xlock(&mqfs_data.mi_lock);
2655	if (uid == (uid_t)-1)
2656		uid = pn->mn_uid;
2657	if (gid == (gid_t)-1)
2658		gid = pn->mn_gid;
2659	if (((uid != pn->mn_uid && uid != active_cred->cr_uid) ||
2660	    (gid != pn->mn_gid && !groupmember(gid, active_cred))) &&
2661	    (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN, 0)))
2662		goto out;
2663	pn->mn_uid = uid;
2664	pn->mn_gid = gid;
2665out:
2666	sx_xunlock(&mqfs_data.mi_lock);
2667	return (error);
2668}
2669
2670static int
2671mqf_kqfilter(struct file *fp, struct knote *kn)
2672{
2673	struct mqueue *mq = FPTOMQ(fp);
2674	int error = 0;
2675
2676	if (kn->kn_filter == EVFILT_READ) {
2677		kn->kn_fop = &mq_rfiltops;
2678		knlist_add(&mq->mq_rsel.si_note, kn, 0);
2679	} else if (kn->kn_filter == EVFILT_WRITE) {
2680		kn->kn_fop = &mq_wfiltops;
2681		knlist_add(&mq->mq_wsel.si_note, kn, 0);
2682	} else
2683		error = EINVAL;
2684	return (error);
2685}
2686
2687static void
2688filt_mqdetach(struct knote *kn)
2689{
2690	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2691
2692	if (kn->kn_filter == EVFILT_READ)
2693		knlist_remove(&mq->mq_rsel.si_note, kn, 0);
2694	else if (kn->kn_filter == EVFILT_WRITE)
2695		knlist_remove(&mq->mq_wsel.si_note, kn, 0);
2696	else
2697		panic("filt_mqdetach");
2698}
2699
2700static int
2701filt_mqread(struct knote *kn, long hint)
2702{
2703	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2704
2705	mtx_assert(&mq->mq_mutex, MA_OWNED);
2706	return (mq->mq_curmsgs != 0);
2707}
2708
2709static int
2710filt_mqwrite(struct knote *kn, long hint)
2711{
2712	struct mqueue *mq = FPTOMQ(kn->kn_fp);
2713
2714	mtx_assert(&mq->mq_mutex, MA_OWNED);
2715	return (mq->mq_curmsgs < mq->mq_maxmsg);
2716}
2717
2718static struct fileops mqueueops = {
2719	.fo_read		= mqf_read,
2720	.fo_write		= mqf_write,
2721	.fo_truncate		= mqf_truncate,
2722	.fo_ioctl		= mqf_ioctl,
2723	.fo_poll		= mqf_poll,
2724	.fo_kqfilter		= mqf_kqfilter,
2725	.fo_stat		= mqf_stat,
2726	.fo_chmod		= mqf_chmod,
2727	.fo_chown		= mqf_chown,
2728	.fo_close		= mqf_close,
2729	.fo_sendfile		= invfo_sendfile,
2730};
2731
2732static struct vop_vector mqfs_vnodeops = {
2733	.vop_default 		= &default_vnodeops,
2734	.vop_access		= mqfs_access,
2735	.vop_cachedlookup	= mqfs_lookup,
2736	.vop_lookup		= vfs_cache_lookup,
2737	.vop_reclaim		= mqfs_reclaim,
2738	.vop_create		= mqfs_create,
2739	.vop_remove		= mqfs_remove,
2740	.vop_inactive		= mqfs_inactive,
2741	.vop_open		= mqfs_open,
2742	.vop_close		= mqfs_close,
2743	.vop_getattr		= mqfs_getattr,
2744	.vop_setattr		= mqfs_setattr,
2745	.vop_read		= mqfs_read,
2746	.vop_write		= VOP_EOPNOTSUPP,
2747	.vop_readdir		= mqfs_readdir,
2748	.vop_mkdir		= VOP_EOPNOTSUPP,
2749	.vop_rmdir		= VOP_EOPNOTSUPP
2750};
2751
2752static struct vfsops mqfs_vfsops = {
2753	.vfs_init 		= mqfs_init,
2754	.vfs_uninit		= mqfs_uninit,
2755	.vfs_mount		= mqfs_mount,
2756	.vfs_unmount		= mqfs_unmount,
2757	.vfs_root		= mqfs_root,
2758	.vfs_statfs		= mqfs_statfs,
2759};
2760
2761static struct vfsconf mqueuefs_vfsconf = {
2762	.vfc_version = VFS_VERSION,
2763	.vfc_name = "mqueuefs",
2764	.vfc_vfsops = &mqfs_vfsops,
2765	.vfc_typenum = -1,
2766	.vfc_flags = VFCF_SYNTHETIC
2767};
2768
2769static struct syscall_helper_data mq_syscalls[] = {
2770	SYSCALL_INIT_HELPER(kmq_open),
2771	SYSCALL_INIT_HELPER(kmq_setattr),
2772	SYSCALL_INIT_HELPER(kmq_timedsend),
2773	SYSCALL_INIT_HELPER(kmq_timedreceive),
2774	SYSCALL_INIT_HELPER(kmq_notify),
2775	SYSCALL_INIT_HELPER(kmq_unlink),
2776	SYSCALL_INIT_LAST
2777};
2778
2779#ifdef COMPAT_FREEBSD32
2780#include <compat/freebsd32/freebsd32.h>
2781#include <compat/freebsd32/freebsd32_proto.h>
2782#include <compat/freebsd32/freebsd32_signal.h>
2783#include <compat/freebsd32/freebsd32_syscall.h>
2784#include <compat/freebsd32/freebsd32_util.h>
2785
2786static void
2787mq_attr_from32(const struct mq_attr32 *from, struct mq_attr *to)
2788{
2789
2790	to->mq_flags = from->mq_flags;
2791	to->mq_maxmsg = from->mq_maxmsg;
2792	to->mq_msgsize = from->mq_msgsize;
2793	to->mq_curmsgs = from->mq_curmsgs;
2794}
2795
2796static void
2797mq_attr_to32(const struct mq_attr *from, struct mq_attr32 *to)
2798{
2799
2800	to->mq_flags = from->mq_flags;
2801	to->mq_maxmsg = from->mq_maxmsg;
2802	to->mq_msgsize = from->mq_msgsize;
2803	to->mq_curmsgs = from->mq_curmsgs;
2804}
2805
2806int
2807freebsd32_kmq_open(struct thread *td, struct freebsd32_kmq_open_args *uap)
2808{
2809	struct mq_attr attr;
2810	struct mq_attr32 attr32;
2811	int flags, error;
2812
2813	if ((uap->flags & O_ACCMODE) == O_ACCMODE || uap->flags & O_EXEC)
2814		return (EINVAL);
2815	flags = FFLAGS(uap->flags);
2816	if ((flags & O_CREAT) != 0 && uap->attr != NULL) {
2817		error = copyin(uap->attr, &attr32, sizeof(attr32));
2818		if (error)
2819			return (error);
2820		mq_attr_from32(&attr32, &attr);
2821	}
2822	return (kern_kmq_open(td, uap->path, flags, uap->mode,
2823	    uap->attr != NULL ? &attr : NULL));
2824}
2825
2826int
2827freebsd32_kmq_setattr(struct thread *td, struct freebsd32_kmq_setattr_args *uap)
2828{
2829	struct mq_attr attr, oattr;
2830	struct mq_attr32 attr32, oattr32;
2831	int error;
2832
2833	if (uap->attr != NULL) {
2834		error = copyin(uap->attr, &attr32, sizeof(attr32));
2835		if (error != 0)
2836			return (error);
2837		mq_attr_from32(&attr32, &attr);
2838	}
2839	error = kern_kmq_setattr(td, uap->mqd, uap->attr != NULL ? &attr : NULL,
2840	    &oattr);
2841	if (error != 0)
2842		return (error);
2843	if (uap->oattr != NULL) {
2844		mq_attr_to32(&oattr, &oattr32);
2845		error = copyout(&oattr32, uap->oattr, sizeof(oattr32));
2846	}
2847	return (error);
2848}
2849
2850int
2851freebsd32_kmq_timedsend(struct thread *td,
2852    struct freebsd32_kmq_timedsend_args *uap)
2853{
2854	struct mqueue *mq;
2855	struct file *fp;
2856	struct timespec32 ets32;
2857	struct timespec *abs_timeout, ets;
2858	int error;
2859	int waitok;
2860
2861	error = getmq_write(td, uap->mqd, &fp, NULL, &mq);
2862	if (error)
2863		return (error);
2864	if (uap->abs_timeout != NULL) {
2865		error = copyin(uap->abs_timeout, &ets32, sizeof(ets32));
2866		if (error != 0)
2867			return (error);
2868		CP(ets32, ets, tv_sec);
2869		CP(ets32, ets, tv_nsec);
2870		abs_timeout = &ets;
2871	} else
2872		abs_timeout = NULL;
2873	waitok = !(fp->f_flag & O_NONBLOCK);
2874	error = mqueue_send(mq, uap->msg_ptr, uap->msg_len,
2875		uap->msg_prio, waitok, abs_timeout);
2876	fdrop(fp, td);
2877	return (error);
2878}
2879
2880int
2881freebsd32_kmq_timedreceive(struct thread *td,
2882    struct freebsd32_kmq_timedreceive_args *uap)
2883{
2884	struct mqueue *mq;
2885	struct file *fp;
2886	struct timespec32 ets32;
2887	struct timespec *abs_timeout, ets;
2888	int error, waitok;
2889
2890	error = getmq_read(td, uap->mqd, &fp, NULL, &mq);
2891	if (error)
2892		return (error);
2893	if (uap->abs_timeout != NULL) {
2894		error = copyin(uap->abs_timeout, &ets32, sizeof(ets32));
2895		if (error != 0)
2896			return (error);
2897		CP(ets32, ets, tv_sec);
2898		CP(ets32, ets, tv_nsec);
2899		abs_timeout = &ets;
2900	} else
2901		abs_timeout = NULL;
2902	waitok = !(fp->f_flag & O_NONBLOCK);
2903	error = mqueue_receive(mq, uap->msg_ptr, uap->msg_len,
2904		uap->msg_prio, waitok, abs_timeout);
2905	fdrop(fp, td);
2906	return (error);
2907}
2908
2909int
2910freebsd32_kmq_notify(struct thread *td, struct freebsd32_kmq_notify_args *uap)
2911{
2912	struct sigevent ev, *evp;
2913	struct sigevent32 ev32;
2914	int error;
2915
2916	if (uap->sigev == NULL) {
2917		evp = NULL;
2918	} else {
2919		error = copyin(uap->sigev, &ev32, sizeof(ev32));
2920		if (error != 0)
2921			return (error);
2922		error = convert_sigevent32(&ev32, &ev);
2923		if (error != 0)
2924			return (error);
2925		evp = &ev;
2926	}
2927	return (kern_kmq_notify(td, uap->mqd, evp));
2928}
2929
2930static struct syscall_helper_data mq32_syscalls[] = {
2931	SYSCALL32_INIT_HELPER(freebsd32_kmq_open),
2932	SYSCALL32_INIT_HELPER(freebsd32_kmq_setattr),
2933	SYSCALL32_INIT_HELPER(freebsd32_kmq_timedsend),
2934	SYSCALL32_INIT_HELPER(freebsd32_kmq_timedreceive),
2935	SYSCALL32_INIT_HELPER(freebsd32_kmq_notify),
2936	SYSCALL32_INIT_HELPER_COMPAT(kmq_unlink),
2937	SYSCALL_INIT_LAST
2938};
2939#endif
2940
2941static int
2942mqinit(void)
2943{
2944	int error;
2945
2946	error = syscall_helper_register(mq_syscalls);
2947	if (error != 0)
2948		return (error);
2949#ifdef COMPAT_FREEBSD32
2950	error = syscall32_helper_register(mq32_syscalls);
2951	if (error != 0)
2952		return (error);
2953#endif
2954	return (0);
2955}
2956
2957static int
2958mqunload(void)
2959{
2960
2961#ifdef COMPAT_FREEBSD32
2962	syscall32_helper_unregister(mq32_syscalls);
2963#endif
2964	syscall_helper_unregister(mq_syscalls);
2965	return (0);
2966}
2967
2968static int
2969mq_modload(struct module *module, int cmd, void *arg)
2970{
2971	int error = 0;
2972
2973	error = vfs_modevent(module, cmd, arg);
2974	if (error != 0)
2975		return (error);
2976
2977	switch (cmd) {
2978	case MOD_LOAD:
2979		error = mqinit();
2980		if (error != 0)
2981			mqunload();
2982		break;
2983	case MOD_UNLOAD:
2984		error = mqunload();
2985		break;
2986	default:
2987		break;
2988	}
2989	return (error);
2990}
2991
2992static moduledata_t mqueuefs_mod = {
2993	"mqueuefs",
2994	mq_modload,
2995	&mqueuefs_vfsconf
2996};
2997DECLARE_MODULE(mqueuefs, mqueuefs_mod, SI_SUB_VFS, SI_ORDER_MIDDLE);
2998MODULE_VERSION(mqueuefs, 1);
2999