1/*-
2 * Implementation of SVID messages
3 *
4 * Author:  Daniel Boulet
5 *
6 * Copyright 1993 Daniel Boulet and RTMX Inc.
7 *
8 * This system call was implemented by Daniel Boulet under contract from RTMX.
9 *
10 * Redistribution and use in source forms, with and without modification,
11 * are permitted provided that this entire comment appears intact.
12 *
13 * Redistribution in binary form may occur without any restrictions.
14 * Obviously, it would be nice if you gave credit where credit is due
15 * but requiring it would be too onerous.
16 *
17 * This software is provided ``AS IS'' without any warranties of any kind.
18 */
19/*-
20 * Copyright (c) 2003-2005 McAfee, Inc.
21 * All rights reserved.
22 *
23 * This software was developed for the FreeBSD Project in part by McAfee
24 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
25 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
26 * program.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 *    notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 *    notice, this list of conditions and the following disclaimer in the
35 *    documentation and/or other materials provided with the distribution.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: releng/11.0/sys/kern/sysv_msg.c 301737 2016-06-09 15:34:33Z jamie $");
52
53#include "opt_compat.h"
54#include "opt_sysvipc.h"
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/sysproto.h>
59#include <sys/kernel.h>
60#include <sys/priv.h>
61#include <sys/proc.h>
62#include <sys/lock.h>
63#include <sys/mutex.h>
64#include <sys/module.h>
65#include <sys/mount.h>
66#include <sys/msg.h>
67#include <sys/racct.h>
68#include <sys/sx.h>
69#include <sys/syscall.h>
70#include <sys/syscallsubr.h>
71#include <sys/sysent.h>
72#include <sys/sysctl.h>
73#include <sys/malloc.h>
74#include <sys/jail.h>
75
76#include <security/mac/mac_framework.h>
77
78FEATURE(sysv_msg, "System V message queues support");
79
80static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
81
82static int msginit(void);
83static int msgunload(void);
84static int sysvmsg_modload(struct module *, int, void *);
85static void msq_remove(struct msqid_kernel *);
86static struct prison *msg_find_prison(struct ucred *);
87static int msq_prison_cansee(struct prison *, struct msqid_kernel *);
88static int msg_prison_check(void *, void *);
89static int msg_prison_set(void *, void *);
90static int msg_prison_get(void *, void *);
91static int msg_prison_remove(void *, void *);
92static void msg_prison_cleanup(struct prison *);
93
94
95#ifdef MSG_DEBUG
96#define DPRINTF(a)	printf a
97#else
98#define DPRINTF(a)	(void)0
99#endif
100
101static void msg_freehdr(struct msg *msghdr);
102
103#ifndef MSGSSZ
104#define MSGSSZ	8		/* Each segment must be 2^N long */
105#endif
106#ifndef MSGSEG
107#define MSGSEG	2048		/* must be less than 32767 */
108#endif
109#define MSGMAX	(MSGSSZ*MSGSEG)
110#ifndef MSGMNB
111#define MSGMNB	2048		/* max # of bytes in a queue */
112#endif
113#ifndef MSGMNI
114#define MSGMNI	40
115#endif
116#ifndef MSGTQL
117#define MSGTQL	40
118#endif
119
120/*
121 * Based on the configuration parameters described in an SVR2 (yes, two)
122 * config(1m) man page.
123 *
124 * Each message is broken up and stored in segments that are msgssz bytes
125 * long.  For efficiency reasons, this should be a power of two.  Also,
126 * it doesn't make sense if it is less than 8 or greater than about 256.
127 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
128 * two between 8 and 1024 inclusive (and panic's if it isn't).
129 */
130struct msginfo msginfo = {
131                MSGMAX,         /* max chars in a message */
132                MSGMNI,         /* # of message queue identifiers */
133                MSGMNB,         /* max chars in a queue */
134                MSGTQL,         /* max messages in system */
135                MSGSSZ,         /* size of a message segment */
136                		/* (must be small power of 2 greater than 4) */
137                MSGSEG          /* number of message segments */
138};
139
140/*
141 * macros to convert between msqid_ds's and msqid's.
142 * (specific to this implementation)
143 */
144#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
145#define MSQID_IX(id)	((id) & 0xffff)
146#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
147
148/*
149 * The rest of this file is specific to this particular implementation.
150 */
151
152struct msgmap {
153	short	next;		/* next segment in buffer */
154    				/* -1 -> available */
155    				/* 0..(MSGSEG-1) -> index of next segment */
156};
157
158#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
159
160static int nfree_msgmaps;	/* # of free map entries */
161static short free_msgmaps;	/* head of linked list of free map entries */
162static struct msg *free_msghdrs;/* list of free msg headers */
163static char *msgpool;		/* MSGMAX byte long msg buffer pool */
164static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
165static struct msg *msghdrs;	/* MSGTQL msg headers */
166static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
167static struct mtx msq_mtx;	/* global mutex for message queues. */
168static unsigned msg_prison_slot;/* prison OSD slot */
169
170static struct syscall_helper_data msg_syscalls[] = {
171	SYSCALL_INIT_HELPER(msgctl),
172	SYSCALL_INIT_HELPER(msgget),
173	SYSCALL_INIT_HELPER(msgsnd),
174	SYSCALL_INIT_HELPER(msgrcv),
175#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
176    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
177	SYSCALL_INIT_HELPER(msgsys),
178	SYSCALL_INIT_HELPER_COMPAT(freebsd7_msgctl),
179#endif
180	SYSCALL_INIT_LAST
181};
182
183#ifdef COMPAT_FREEBSD32
184#include <compat/freebsd32/freebsd32.h>
185#include <compat/freebsd32/freebsd32_ipc.h>
186#include <compat/freebsd32/freebsd32_proto.h>
187#include <compat/freebsd32/freebsd32_signal.h>
188#include <compat/freebsd32/freebsd32_syscall.h>
189#include <compat/freebsd32/freebsd32_util.h>
190
191static struct syscall_helper_data msg32_syscalls[] = {
192	SYSCALL32_INIT_HELPER(freebsd32_msgctl),
193	SYSCALL32_INIT_HELPER(freebsd32_msgsnd),
194	SYSCALL32_INIT_HELPER(freebsd32_msgrcv),
195	SYSCALL32_INIT_HELPER_COMPAT(msgget),
196	SYSCALL32_INIT_HELPER(freebsd32_msgsys),
197#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
198    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
199	SYSCALL32_INIT_HELPER(freebsd7_freebsd32_msgctl),
200#endif
201	SYSCALL_INIT_LAST
202};
203#endif
204
205static int
206msginit()
207{
208	struct prison *pr;
209	void **rsv;
210	int i, error;
211	osd_method_t methods[PR_MAXMETHOD] = {
212	    [PR_METHOD_CHECK] =		msg_prison_check,
213	    [PR_METHOD_SET] =		msg_prison_set,
214	    [PR_METHOD_GET] =		msg_prison_get,
215	    [PR_METHOD_REMOVE] =	msg_prison_remove,
216	};
217
218	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
219	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
220	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
221	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
222	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
223	    M_WAITOK);
224
225	/*
226	 * msginfo.msgssz should be a power of two for efficiency reasons.
227	 * It is also pretty silly if msginfo.msgssz is less than 8
228	 * or greater than about 256 so ...
229	 */
230
231	i = 8;
232	while (i < 1024 && i != msginfo.msgssz)
233		i <<= 1;
234    	if (i != msginfo.msgssz) {
235		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
236		    msginfo.msgssz));
237		panic("msginfo.msgssz not a small power of 2");
238	}
239
240	if (msginfo.msgseg > 32767) {
241		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
242		panic("msginfo.msgseg > 32767");
243	}
244
245	for (i = 0; i < msginfo.msgseg; i++) {
246		if (i > 0)
247			msgmaps[i-1].next = i;
248		msgmaps[i].next = -1;	/* implies entry is available */
249	}
250	free_msgmaps = 0;
251	nfree_msgmaps = msginfo.msgseg;
252
253	for (i = 0; i < msginfo.msgtql; i++) {
254		msghdrs[i].msg_type = 0;
255		if (i > 0)
256			msghdrs[i-1].msg_next = &msghdrs[i];
257		msghdrs[i].msg_next = NULL;
258#ifdef MAC
259		mac_sysvmsg_init(&msghdrs[i]);
260#endif
261    	}
262	free_msghdrs = &msghdrs[0];
263
264	for (i = 0; i < msginfo.msgmni; i++) {
265		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
266		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
267		msqids[i].u.msg_perm.mode = 0;
268#ifdef MAC
269		mac_sysvmsq_init(&msqids[i]);
270#endif
271	}
272	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
273
274	/* Set current prisons according to their allow.sysvipc. */
275	msg_prison_slot = osd_jail_register(NULL, methods);
276	rsv = osd_reserve(msg_prison_slot);
277	prison_lock(&prison0);
278	(void)osd_jail_set_reserved(&prison0, msg_prison_slot, rsv, &prison0);
279	prison_unlock(&prison0);
280	rsv = NULL;
281	sx_slock(&allprison_lock);
282	TAILQ_FOREACH(pr, &allprison, pr_list) {
283		if (rsv == NULL)
284			rsv = osd_reserve(msg_prison_slot);
285		prison_lock(pr);
286		if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
287			(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
288			    &prison0);
289			rsv = NULL;
290		}
291		prison_unlock(pr);
292	}
293	if (rsv != NULL)
294		osd_free_reserved(rsv);
295	sx_sunlock(&allprison_lock);
296
297	error = syscall_helper_register(msg_syscalls, SY_THR_STATIC_KLD);
298	if (error != 0)
299		return (error);
300#ifdef COMPAT_FREEBSD32
301	error = syscall32_helper_register(msg32_syscalls, SY_THR_STATIC_KLD);
302	if (error != 0)
303		return (error);
304#endif
305	return (0);
306}
307
308static int
309msgunload()
310{
311	struct msqid_kernel *msqkptr;
312	int msqid;
313#ifdef MAC
314	int i;
315#endif
316
317	syscall_helper_unregister(msg_syscalls);
318#ifdef COMPAT_FREEBSD32
319	syscall32_helper_unregister(msg32_syscalls);
320#endif
321
322	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
323		msqkptr = &msqids[msqid];
324		if (msqkptr->u.msg_qbytes != 0 ||
325		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
326			break;
327	}
328	if (msqid != msginfo.msgmni)
329		return (EBUSY);
330
331	if (msg_prison_slot != 0)
332		osd_jail_deregister(msg_prison_slot);
333#ifdef MAC
334	for (i = 0; i < msginfo.msgtql; i++)
335		mac_sysvmsg_destroy(&msghdrs[i]);
336	for (msqid = 0; msqid < msginfo.msgmni; msqid++)
337		mac_sysvmsq_destroy(&msqids[msqid]);
338#endif
339	free(msgpool, M_MSG);
340	free(msgmaps, M_MSG);
341	free(msghdrs, M_MSG);
342	free(msqids, M_MSG);
343	mtx_destroy(&msq_mtx);
344	return (0);
345}
346
347
348static int
349sysvmsg_modload(struct module *module, int cmd, void *arg)
350{
351	int error = 0;
352
353	switch (cmd) {
354	case MOD_LOAD:
355		error = msginit();
356		if (error != 0)
357			msgunload();
358		break;
359	case MOD_UNLOAD:
360		error = msgunload();
361		break;
362	case MOD_SHUTDOWN:
363		break;
364	default:
365		error = EINVAL;
366		break;
367	}
368	return (error);
369}
370
371static moduledata_t sysvmsg_mod = {
372	"sysvmsg",
373	&sysvmsg_modload,
374	NULL
375};
376
377DECLARE_MODULE(sysvmsg, sysvmsg_mod, SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
378MODULE_VERSION(sysvmsg, 1);
379
380static void
381msg_freehdr(msghdr)
382	struct msg *msghdr;
383{
384	while (msghdr->msg_ts > 0) {
385		short next;
386		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
387			panic("msghdr->msg_spot out of range");
388		next = msgmaps[msghdr->msg_spot].next;
389		msgmaps[msghdr->msg_spot].next = free_msgmaps;
390		free_msgmaps = msghdr->msg_spot;
391		nfree_msgmaps++;
392		msghdr->msg_spot = next;
393		if (msghdr->msg_ts >= msginfo.msgssz)
394			msghdr->msg_ts -= msginfo.msgssz;
395		else
396			msghdr->msg_ts = 0;
397	}
398	if (msghdr->msg_spot != -1)
399		panic("msghdr->msg_spot != -1");
400	msghdr->msg_next = free_msghdrs;
401	free_msghdrs = msghdr;
402#ifdef MAC
403	mac_sysvmsg_cleanup(msghdr);
404#endif
405}
406
407static void
408msq_remove(struct msqid_kernel *msqkptr)
409{
410	struct msg *msghdr;
411
412	racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
413	racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
414	racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
415	crfree(msqkptr->cred);
416	msqkptr->cred = NULL;
417
418	/* Free the message headers */
419	msghdr = msqkptr->u.msg_first;
420	while (msghdr != NULL) {
421		struct msg *msghdr_tmp;
422
423		/* Free the segments of each message */
424		msqkptr->u.msg_cbytes -= msghdr->msg_ts;
425		msqkptr->u.msg_qnum--;
426		msghdr_tmp = msghdr;
427		msghdr = msghdr->msg_next;
428		msg_freehdr(msghdr_tmp);
429	}
430
431	if (msqkptr->u.msg_cbytes != 0)
432		panic("msg_cbytes is screwed up");
433	if (msqkptr->u.msg_qnum != 0)
434		panic("msg_qnum is screwed up");
435
436	msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
437
438#ifdef MAC
439	mac_sysvmsq_cleanup(msqkptr);
440#endif
441
442	wakeup(msqkptr);
443}
444
445static struct prison *
446msg_find_prison(struct ucred *cred)
447{
448	struct prison *pr, *rpr;
449
450	pr = cred->cr_prison;
451	prison_lock(pr);
452	rpr = osd_jail_get(pr, msg_prison_slot);
453	prison_unlock(pr);
454	return rpr;
455}
456
457static int
458msq_prison_cansee(struct prison *rpr, struct msqid_kernel *msqkptr)
459{
460
461	if (msqkptr->cred == NULL ||
462	    !(rpr == msqkptr->cred->cr_prison ||
463	      prison_ischild(rpr, msqkptr->cred->cr_prison)))
464		return (EINVAL);
465	return (0);
466}
467
468#ifndef _SYS_SYSPROTO_H_
469struct msgctl_args {
470	int	msqid;
471	int	cmd;
472	struct	msqid_ds *buf;
473};
474#endif
475int
476sys_msgctl(td, uap)
477	struct thread *td;
478	register struct msgctl_args *uap;
479{
480	int msqid = uap->msqid;
481	int cmd = uap->cmd;
482	struct msqid_ds msqbuf;
483	int error;
484
485	DPRINTF(("call to msgctl(%d, %d, %p)\n", msqid, cmd, uap->buf));
486	if (cmd == IPC_SET &&
487	    (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0)
488		return (error);
489	error = kern_msgctl(td, msqid, cmd, &msqbuf);
490	if (cmd == IPC_STAT && error == 0)
491		error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds));
492	return (error);
493}
494
495int
496kern_msgctl(td, msqid, cmd, msqbuf)
497	struct thread *td;
498	int msqid;
499	int cmd;
500	struct msqid_ds *msqbuf;
501{
502	int rval, error, msqix;
503	register struct msqid_kernel *msqkptr;
504	struct prison *rpr;
505
506	rpr = msg_find_prison(td->td_ucred);
507	if (rpr == NULL)
508		return (ENOSYS);
509
510	msqix = IPCID_TO_IX(msqid);
511
512	if (msqix < 0 || msqix >= msginfo.msgmni) {
513		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
514		    msginfo.msgmni));
515		return (EINVAL);
516	}
517
518	msqkptr = &msqids[msqix];
519
520	mtx_lock(&msq_mtx);
521	if (msqkptr->u.msg_qbytes == 0) {
522		DPRINTF(("no such msqid\n"));
523		error = EINVAL;
524		goto done2;
525	}
526	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
527		DPRINTF(("wrong sequence number\n"));
528		error = EINVAL;
529		goto done2;
530	}
531
532	error = msq_prison_cansee(rpr, msqkptr);
533	if (error != 0) {
534		DPRINTF(("requester can't see prison\n"));
535		goto done2;
536	}
537
538#ifdef MAC
539	error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
540	if (error != 0)
541		goto done2;
542#endif
543
544	error = 0;
545	rval = 0;
546
547	switch (cmd) {
548
549	case IPC_RMID:
550	{
551#ifdef MAC
552		struct msg *msghdr;
553#endif
554		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
555			goto done2;
556
557#ifdef MAC
558		/*
559		 * Check that the thread has MAC access permissions to
560		 * individual msghdrs.  Note: We need to do this in a
561		 * separate loop because the actual loop alters the
562		 * msq/msghdr info as it progresses, and there is no going
563		 * back if half the way through we discover that the
564		 * thread cannot free a certain msghdr.  The msq will get
565		 * into an inconsistent state.
566		 */
567		for (msghdr = msqkptr->u.msg_first; msghdr != NULL;
568		    msghdr = msghdr->msg_next) {
569			error = mac_sysvmsq_check_msgrmid(td->td_ucred, msghdr);
570			if (error != 0)
571				goto done2;
572		}
573#endif
574
575		msq_remove(msqkptr);
576	}
577
578		break;
579
580	case IPC_SET:
581		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
582			goto done2;
583		if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) {
584			error = priv_check(td, PRIV_IPC_MSGSIZE);
585			if (error)
586				goto done2;
587		}
588		if (msqbuf->msg_qbytes > msginfo.msgmnb) {
589			DPRINTF(("can't increase msg_qbytes beyond %d"
590			    "(truncating)\n", msginfo.msgmnb));
591			msqbuf->msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
592		}
593		if (msqbuf->msg_qbytes == 0) {
594			DPRINTF(("can't reduce msg_qbytes to 0\n"));
595			error = EINVAL;		/* non-standard errno! */
596			goto done2;
597		}
598		msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid;	/* change the owner */
599		msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid;	/* change the owner */
600		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
601		    (msqbuf->msg_perm.mode & 0777);
602		msqkptr->u.msg_qbytes = msqbuf->msg_qbytes;
603		msqkptr->u.msg_ctime = time_second;
604		break;
605
606	case IPC_STAT:
607		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
608			DPRINTF(("requester doesn't have read access\n"));
609			goto done2;
610		}
611		*msqbuf = msqkptr->u;
612		if (td->td_ucred->cr_prison != msqkptr->cred->cr_prison)
613			msqbuf->msg_perm.key = IPC_PRIVATE;
614		break;
615
616	default:
617		DPRINTF(("invalid command %d\n", cmd));
618		error = EINVAL;
619		goto done2;
620	}
621
622	if (error == 0)
623		td->td_retval[0] = rval;
624done2:
625	mtx_unlock(&msq_mtx);
626	return (error);
627}
628
629#ifndef _SYS_SYSPROTO_H_
630struct msgget_args {
631	key_t	key;
632	int	msgflg;
633};
634#endif
635
636int
637sys_msgget(td, uap)
638	struct thread *td;
639	register struct msgget_args *uap;
640{
641	int msqid, error = 0;
642	int key = uap->key;
643	int msgflg = uap->msgflg;
644	struct ucred *cred = td->td_ucred;
645	register struct msqid_kernel *msqkptr = NULL;
646
647	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
648
649	if (msg_find_prison(cred) == NULL)
650		return (ENOSYS);
651
652	mtx_lock(&msq_mtx);
653	if (key != IPC_PRIVATE) {
654		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
655			msqkptr = &msqids[msqid];
656			if (msqkptr->u.msg_qbytes != 0 &&
657			    msqkptr->cred != NULL &&
658			    msqkptr->cred->cr_prison == cred->cr_prison &&
659			    msqkptr->u.msg_perm.key == key)
660				break;
661		}
662		if (msqid < msginfo.msgmni) {
663			DPRINTF(("found public key\n"));
664			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
665				DPRINTF(("not exclusive\n"));
666				error = EEXIST;
667				goto done2;
668			}
669			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
670			    msgflg & 0700))) {
671				DPRINTF(("requester doesn't have 0%o access\n",
672				    msgflg & 0700));
673				goto done2;
674			}
675#ifdef MAC
676			error = mac_sysvmsq_check_msqget(cred, msqkptr);
677			if (error != 0)
678				goto done2;
679#endif
680			goto found;
681		}
682	}
683
684	DPRINTF(("need to allocate the msqid_ds\n"));
685	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
686		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
687			/*
688			 * Look for an unallocated and unlocked msqid_ds.
689			 * msqid_ds's can be locked by msgsnd or msgrcv while
690			 * they are copying the message in/out.  We can't
691			 * re-use the entry until they release it.
692			 */
693			msqkptr = &msqids[msqid];
694			if (msqkptr->u.msg_qbytes == 0 &&
695			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
696				break;
697		}
698		if (msqid == msginfo.msgmni) {
699			DPRINTF(("no more msqid_ds's available\n"));
700			error = ENOSPC;
701			goto done2;
702		}
703#ifdef RACCT
704		if (racct_enable) {
705			PROC_LOCK(td->td_proc);
706			error = racct_add(td->td_proc, RACCT_NMSGQ, 1);
707			PROC_UNLOCK(td->td_proc);
708			if (error != 0) {
709				error = ENOSPC;
710				goto done2;
711			}
712		}
713#endif
714		DPRINTF(("msqid %d is available\n", msqid));
715		msqkptr->u.msg_perm.key = key;
716		msqkptr->u.msg_perm.cuid = cred->cr_uid;
717		msqkptr->u.msg_perm.uid = cred->cr_uid;
718		msqkptr->u.msg_perm.cgid = cred->cr_gid;
719		msqkptr->u.msg_perm.gid = cred->cr_gid;
720		msqkptr->u.msg_perm.mode = (msgflg & 0777);
721		msqkptr->cred = crhold(cred);
722		/* Make sure that the returned msqid is unique */
723		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
724		msqkptr->u.msg_first = NULL;
725		msqkptr->u.msg_last = NULL;
726		msqkptr->u.msg_cbytes = 0;
727		msqkptr->u.msg_qnum = 0;
728		msqkptr->u.msg_qbytes = msginfo.msgmnb;
729		msqkptr->u.msg_lspid = 0;
730		msqkptr->u.msg_lrpid = 0;
731		msqkptr->u.msg_stime = 0;
732		msqkptr->u.msg_rtime = 0;
733		msqkptr->u.msg_ctime = time_second;
734#ifdef MAC
735		mac_sysvmsq_create(cred, msqkptr);
736#endif
737	} else {
738		DPRINTF(("didn't find it and wasn't asked to create it\n"));
739		error = ENOENT;
740		goto done2;
741	}
742
743found:
744	/* Construct the unique msqid */
745	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
746done2:
747	mtx_unlock(&msq_mtx);
748	return (error);
749}
750
751#ifndef _SYS_SYSPROTO_H_
752struct msgsnd_args {
753	int	msqid;
754	const void	*msgp;
755	size_t	msgsz;
756	int	msgflg;
757};
758#endif
759int
760kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
761	struct thread *td;
762	int msqid;
763	const void *msgp;	/* XXX msgp is actually mtext. */
764	size_t msgsz;
765	int msgflg;
766	long mtype;
767{
768	int msqix, segs_needed, error = 0;
769	register struct msqid_kernel *msqkptr;
770	register struct msg *msghdr;
771	struct prison *rpr;
772	short next;
773#ifdef RACCT
774	size_t saved_msgsz;
775#endif
776
777	rpr = msg_find_prison(td->td_ucred);
778	if (rpr == NULL)
779		return (ENOSYS);
780
781	mtx_lock(&msq_mtx);
782	msqix = IPCID_TO_IX(msqid);
783
784	if (msqix < 0 || msqix >= msginfo.msgmni) {
785		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
786		    msginfo.msgmni));
787		error = EINVAL;
788		goto done2;
789	}
790
791	msqkptr = &msqids[msqix];
792	if (msqkptr->u.msg_qbytes == 0) {
793		DPRINTF(("no such message queue id\n"));
794		error = EINVAL;
795		goto done2;
796	}
797	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
798		DPRINTF(("wrong sequence number\n"));
799		error = EINVAL;
800		goto done2;
801	}
802
803	if ((error = msq_prison_cansee(rpr, msqkptr))) {
804		DPRINTF(("requester can't see prison\n"));
805		goto done2;
806	}
807
808	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
809		DPRINTF(("requester doesn't have write access\n"));
810		goto done2;
811	}
812
813#ifdef MAC
814	error = mac_sysvmsq_check_msqsnd(td->td_ucred, msqkptr);
815	if (error != 0)
816		goto done2;
817#endif
818
819#ifdef RACCT
820	if (racct_enable) {
821		PROC_LOCK(td->td_proc);
822		if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) {
823			PROC_UNLOCK(td->td_proc);
824			error = EAGAIN;
825			goto done2;
826		}
827		saved_msgsz = msgsz;
828		if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) {
829			racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
830			PROC_UNLOCK(td->td_proc);
831			error = EAGAIN;
832			goto done2;
833		}
834		PROC_UNLOCK(td->td_proc);
835	}
836#endif
837
838	segs_needed = howmany(msgsz, msginfo.msgssz);
839	DPRINTF(("msgsz=%zu, msgssz=%d, segs_needed=%d\n", msgsz,
840	    msginfo.msgssz, segs_needed));
841	for (;;) {
842		int need_more_resources = 0;
843
844		/*
845		 * check msgsz
846		 * (inside this loop in case msg_qbytes changes while we sleep)
847		 */
848
849		if (msgsz > msqkptr->u.msg_qbytes) {
850			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
851			error = EINVAL;
852			goto done3;
853		}
854
855		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
856			DPRINTF(("msqid is locked\n"));
857			need_more_resources = 1;
858		}
859		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
860			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
861			need_more_resources = 1;
862		}
863		if (segs_needed > nfree_msgmaps) {
864			DPRINTF(("segs_needed > nfree_msgmaps\n"));
865			need_more_resources = 1;
866		}
867		if (free_msghdrs == NULL) {
868			DPRINTF(("no more msghdrs\n"));
869			need_more_resources = 1;
870		}
871
872		if (need_more_resources) {
873			int we_own_it;
874
875			if ((msgflg & IPC_NOWAIT) != 0) {
876				DPRINTF(("need more resources but caller "
877				    "doesn't want to wait\n"));
878				error = EAGAIN;
879				goto done3;
880			}
881
882			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
883				DPRINTF(("we don't own the msqid_ds\n"));
884				we_own_it = 0;
885			} else {
886				/* Force later arrivals to wait for our
887				   request */
888				DPRINTF(("we own the msqid_ds\n"));
889				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
890				we_own_it = 1;
891			}
892			DPRINTF(("msgsnd:  goodnight\n"));
893			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
894			    "msgsnd", hz);
895			DPRINTF(("msgsnd:  good morning, error=%d\n", error));
896			if (we_own_it)
897				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
898			if (error == EWOULDBLOCK) {
899				DPRINTF(("msgsnd:  timed out\n"));
900				continue;
901			}
902			if (error != 0) {
903				DPRINTF(("msgsnd:  interrupted system call\n"));
904				error = EINTR;
905				goto done3;
906			}
907
908			/*
909			 * Make sure that the msq queue still exists
910			 */
911
912			if (msqkptr->u.msg_qbytes == 0) {
913				DPRINTF(("msqid deleted\n"));
914				error = EIDRM;
915				goto done3;
916			}
917
918		} else {
919			DPRINTF(("got all the resources that we need\n"));
920			break;
921		}
922	}
923
924	/*
925	 * We have the resources that we need.
926	 * Make sure!
927	 */
928
929	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
930		panic("msg_perm.mode & MSG_LOCKED");
931	if (segs_needed > nfree_msgmaps)
932		panic("segs_needed > nfree_msgmaps");
933	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
934		panic("msgsz + msg_cbytes > msg_qbytes");
935	if (free_msghdrs == NULL)
936		panic("no more msghdrs");
937
938	/*
939	 * Re-lock the msqid_ds in case we page-fault when copying in the
940	 * message
941	 */
942
943	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
944		panic("msqid_ds is already locked");
945	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
946
947	/*
948	 * Allocate a message header
949	 */
950
951	msghdr = free_msghdrs;
952	free_msghdrs = msghdr->msg_next;
953	msghdr->msg_spot = -1;
954	msghdr->msg_ts = msgsz;
955	msghdr->msg_type = mtype;
956#ifdef MAC
957	/*
958	 * XXXMAC: Should the mac_sysvmsq_check_msgmsq check follow here
959	 * immediately?  Or, should it be checked just before the msg is
960	 * enqueued in the msgq (as it is done now)?
961	 */
962	mac_sysvmsg_create(td->td_ucred, msqkptr, msghdr);
963#endif
964
965	/*
966	 * Allocate space for the message
967	 */
968
969	while (segs_needed > 0) {
970		if (nfree_msgmaps <= 0)
971			panic("not enough msgmaps");
972		if (free_msgmaps == -1)
973			panic("nil free_msgmaps");
974		next = free_msgmaps;
975		if (next <= -1)
976			panic("next too low #1");
977		if (next >= msginfo.msgseg)
978			panic("next out of range #1");
979		DPRINTF(("allocating segment %d to message\n", next));
980		free_msgmaps = msgmaps[next].next;
981		nfree_msgmaps--;
982		msgmaps[next].next = msghdr->msg_spot;
983		msghdr->msg_spot = next;
984		segs_needed--;
985	}
986
987	/*
988	 * Validate the message type
989	 */
990
991	if (msghdr->msg_type < 1) {
992		msg_freehdr(msghdr);
993		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
994		wakeup(msqkptr);
995		DPRINTF(("mtype (%ld) < 1\n", msghdr->msg_type));
996		error = EINVAL;
997		goto done3;
998	}
999
1000	/*
1001	 * Copy in the message body
1002	 */
1003
1004	next = msghdr->msg_spot;
1005	while (msgsz > 0) {
1006		size_t tlen;
1007		if (msgsz > msginfo.msgssz)
1008			tlen = msginfo.msgssz;
1009		else
1010			tlen = msgsz;
1011		if (next <= -1)
1012			panic("next too low #2");
1013		if (next >= msginfo.msgseg)
1014			panic("next out of range #2");
1015		mtx_unlock(&msq_mtx);
1016		if ((error = copyin(msgp, &msgpool[next * msginfo.msgssz],
1017		    tlen)) != 0) {
1018			mtx_lock(&msq_mtx);
1019			DPRINTF(("error %d copying in message segment\n",
1020			    error));
1021			msg_freehdr(msghdr);
1022			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
1023			wakeup(msqkptr);
1024			goto done3;
1025		}
1026		mtx_lock(&msq_mtx);
1027		msgsz -= tlen;
1028		msgp = (const char *)msgp + tlen;
1029		next = msgmaps[next].next;
1030	}
1031	if (next != -1)
1032		panic("didn't use all the msg segments");
1033
1034	/*
1035	 * We've got the message.  Unlock the msqid_ds.
1036	 */
1037
1038	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
1039
1040	/*
1041	 * Make sure that the msqid_ds is still allocated.
1042	 */
1043
1044	if (msqkptr->u.msg_qbytes == 0) {
1045		msg_freehdr(msghdr);
1046		wakeup(msqkptr);
1047		error = EIDRM;
1048		goto done3;
1049	}
1050
1051#ifdef MAC
1052	/*
1053	 * Note: Since the task/thread allocates the msghdr and usually
1054	 * primes it with its own MAC label, for a majority of policies, it
1055	 * won't be necessary to check whether the msghdr has access
1056	 * permissions to the msgq.  The mac_sysvmsq_check_msqsnd check would
1057	 * suffice in that case.  However, this hook may be required where
1058	 * individual policies derive a non-identical label for the msghdr
1059	 * from the current thread label and may want to check the msghdr
1060	 * enqueue permissions, along with read/write permissions to the
1061	 * msgq.
1062	 */
1063	error = mac_sysvmsq_check_msgmsq(td->td_ucred, msghdr, msqkptr);
1064	if (error != 0) {
1065		msg_freehdr(msghdr);
1066		wakeup(msqkptr);
1067		goto done3;
1068	}
1069#endif
1070
1071	/*
1072	 * Put the message into the queue
1073	 */
1074	if (msqkptr->u.msg_first == NULL) {
1075		msqkptr->u.msg_first = msghdr;
1076		msqkptr->u.msg_last = msghdr;
1077	} else {
1078		msqkptr->u.msg_last->msg_next = msghdr;
1079		msqkptr->u.msg_last = msghdr;
1080	}
1081	msqkptr->u.msg_last->msg_next = NULL;
1082
1083	msqkptr->u.msg_cbytes += msghdr->msg_ts;
1084	msqkptr->u.msg_qnum++;
1085	msqkptr->u.msg_lspid = td->td_proc->p_pid;
1086	msqkptr->u.msg_stime = time_second;
1087
1088	wakeup(msqkptr);
1089	td->td_retval[0] = 0;
1090done3:
1091#ifdef RACCT
1092	if (racct_enable && error != 0) {
1093		PROC_LOCK(td->td_proc);
1094		racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
1095		racct_sub(td->td_proc, RACCT_MSGQSIZE, saved_msgsz);
1096		PROC_UNLOCK(td->td_proc);
1097	}
1098#endif
1099done2:
1100	mtx_unlock(&msq_mtx);
1101	return (error);
1102}
1103
1104int
1105sys_msgsnd(td, uap)
1106	struct thread *td;
1107	register struct msgsnd_args *uap;
1108{
1109	int error;
1110	long mtype;
1111
1112	DPRINTF(("call to msgsnd(%d, %p, %zu, %d)\n", uap->msqid, uap->msgp,
1113	    uap->msgsz, uap->msgflg));
1114
1115	if ((error = copyin(uap->msgp, &mtype, sizeof(mtype))) != 0) {
1116		DPRINTF(("error %d copying the message type\n", error));
1117		return (error);
1118	}
1119	return (kern_msgsnd(td, uap->msqid,
1120	    (const char *)uap->msgp + sizeof(mtype),
1121	    uap->msgsz, uap->msgflg, mtype));
1122}
1123
1124#ifndef _SYS_SYSPROTO_H_
1125struct msgrcv_args {
1126	int	msqid;
1127	void	*msgp;
1128	size_t	msgsz;
1129	long	msgtyp;
1130	int	msgflg;
1131};
1132#endif
1133int
1134kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
1135	struct thread *td;
1136	int msqid;
1137	void *msgp;	/* XXX msgp is actually mtext. */
1138	size_t msgsz;
1139	long msgtyp;
1140	int msgflg;
1141	long *mtype;
1142{
1143	size_t len;
1144	register struct msqid_kernel *msqkptr;
1145	register struct msg *msghdr;
1146	struct prison *rpr;
1147	int msqix, error = 0;
1148	short next;
1149
1150	rpr = msg_find_prison(td->td_ucred);
1151	if (rpr == NULL)
1152		return (ENOSYS);
1153
1154	msqix = IPCID_TO_IX(msqid);
1155
1156	if (msqix < 0 || msqix >= msginfo.msgmni) {
1157		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
1158		    msginfo.msgmni));
1159		return (EINVAL);
1160	}
1161
1162	msqkptr = &msqids[msqix];
1163	mtx_lock(&msq_mtx);
1164	if (msqkptr->u.msg_qbytes == 0) {
1165		DPRINTF(("no such message queue id\n"));
1166		error = EINVAL;
1167		goto done2;
1168	}
1169	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1170		DPRINTF(("wrong sequence number\n"));
1171		error = EINVAL;
1172		goto done2;
1173	}
1174
1175	if ((error = msq_prison_cansee(rpr, msqkptr))) {
1176		DPRINTF(("requester can't see prison\n"));
1177		goto done2;
1178	}
1179
1180	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
1181		DPRINTF(("requester doesn't have read access\n"));
1182		goto done2;
1183	}
1184
1185#ifdef MAC
1186	error = mac_sysvmsq_check_msqrcv(td->td_ucred, msqkptr);
1187	if (error != 0)
1188		goto done2;
1189#endif
1190
1191	msghdr = NULL;
1192	while (msghdr == NULL) {
1193		if (msgtyp == 0) {
1194			msghdr = msqkptr->u.msg_first;
1195			if (msghdr != NULL) {
1196				if (msgsz < msghdr->msg_ts &&
1197				    (msgflg & MSG_NOERROR) == 0) {
1198					DPRINTF(("first message on the queue "
1199					    "is too big (want %zu, got %d)\n",
1200					    msgsz, msghdr->msg_ts));
1201					error = E2BIG;
1202					goto done2;
1203				}
1204#ifdef MAC
1205				error = mac_sysvmsq_check_msgrcv(td->td_ucred,
1206				    msghdr);
1207				if (error != 0)
1208					goto done2;
1209#endif
1210				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
1211					msqkptr->u.msg_first = NULL;
1212					msqkptr->u.msg_last = NULL;
1213				} else {
1214					msqkptr->u.msg_first = msghdr->msg_next;
1215					if (msqkptr->u.msg_first == NULL)
1216						panic("msg_first/last screwed up #1");
1217				}
1218			}
1219		} else {
1220			struct msg *previous;
1221			struct msg **prev;
1222
1223			previous = NULL;
1224			prev = &(msqkptr->u.msg_first);
1225			while ((msghdr = *prev) != NULL) {
1226				/*
1227				 * Is this message's type an exact match or is
1228				 * this message's type less than or equal to
1229				 * the absolute value of a negative msgtyp?
1230				 * Note that the second half of this test can
1231				 * NEVER be true if msgtyp is positive since
1232				 * msg_type is always positive!
1233				 */
1234
1235				if (msgtyp == msghdr->msg_type ||
1236				    msghdr->msg_type <= -msgtyp) {
1237					DPRINTF(("found message type %ld, "
1238					    "requested %ld\n",
1239					    msghdr->msg_type, msgtyp));
1240					if (msgsz < msghdr->msg_ts &&
1241					    (msgflg & MSG_NOERROR) == 0) {
1242						DPRINTF(("requested message "
1243						    "on the queue is too big "
1244						    "(want %zu, got %hu)\n",
1245						    msgsz, msghdr->msg_ts));
1246						error = E2BIG;
1247						goto done2;
1248					}
1249#ifdef MAC
1250					error = mac_sysvmsq_check_msgrcv(
1251					    td->td_ucred, msghdr);
1252					if (error != 0)
1253						goto done2;
1254#endif
1255					*prev = msghdr->msg_next;
1256					if (msghdr == msqkptr->u.msg_last) {
1257						if (previous == NULL) {
1258							if (prev !=
1259							    &msqkptr->u.msg_first)
1260								panic("msg_first/last screwed up #2");
1261							msqkptr->u.msg_first =
1262							    NULL;
1263							msqkptr->u.msg_last =
1264							    NULL;
1265						} else {
1266							if (prev ==
1267							    &msqkptr->u.msg_first)
1268								panic("msg_first/last screwed up #3");
1269							msqkptr->u.msg_last =
1270							    previous;
1271						}
1272					}
1273					break;
1274				}
1275				previous = msghdr;
1276				prev = &(msghdr->msg_next);
1277			}
1278		}
1279
1280		/*
1281		 * We've either extracted the msghdr for the appropriate
1282		 * message or there isn't one.
1283		 * If there is one then bail out of this loop.
1284		 */
1285
1286		if (msghdr != NULL)
1287			break;
1288
1289		/*
1290		 * Hmph!  No message found.  Does the user want to wait?
1291		 */
1292
1293		if ((msgflg & IPC_NOWAIT) != 0) {
1294			DPRINTF(("no appropriate message found (msgtyp=%ld)\n",
1295			    msgtyp));
1296			/* The SVID says to return ENOMSG. */
1297			error = ENOMSG;
1298			goto done2;
1299		}
1300
1301		/*
1302		 * Wait for something to happen
1303		 */
1304
1305		DPRINTF(("msgrcv:  goodnight\n"));
1306		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1307		    "msgrcv", 0);
1308		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1309
1310		if (error != 0) {
1311			DPRINTF(("msgrcv:  interrupted system call\n"));
1312			error = EINTR;
1313			goto done2;
1314		}
1315
1316		/*
1317		 * Make sure that the msq queue still exists
1318		 */
1319
1320		if (msqkptr->u.msg_qbytes == 0 ||
1321		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1322			DPRINTF(("msqid deleted\n"));
1323			error = EIDRM;
1324			goto done2;
1325		}
1326	}
1327
1328	/*
1329	 * Return the message to the user.
1330	 *
1331	 * First, do the bookkeeping (before we risk being interrupted).
1332	 */
1333
1334	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1335	msqkptr->u.msg_qnum--;
1336	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1337	msqkptr->u.msg_rtime = time_second;
1338
1339	racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, 1);
1340	racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msghdr->msg_ts);
1341
1342	/*
1343	 * Make msgsz the actual amount that we'll be returning.
1344	 * Note that this effectively truncates the message if it is too long
1345	 * (since msgsz is never increased).
1346	 */
1347
1348	DPRINTF(("found a message, msgsz=%zu, msg_ts=%hu\n", msgsz,
1349	    msghdr->msg_ts));
1350	if (msgsz > msghdr->msg_ts)
1351		msgsz = msghdr->msg_ts;
1352	*mtype = msghdr->msg_type;
1353
1354	/*
1355	 * Return the segments to the user
1356	 */
1357
1358	next = msghdr->msg_spot;
1359	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1360		size_t tlen;
1361
1362		if (msgsz - len > msginfo.msgssz)
1363			tlen = msginfo.msgssz;
1364		else
1365			tlen = msgsz - len;
1366		if (next <= -1)
1367			panic("next too low #3");
1368		if (next >= msginfo.msgseg)
1369			panic("next out of range #3");
1370		mtx_unlock(&msq_mtx);
1371		error = copyout(&msgpool[next * msginfo.msgssz], msgp, tlen);
1372		mtx_lock(&msq_mtx);
1373		if (error != 0) {
1374			DPRINTF(("error (%d) copying out message segment\n",
1375			    error));
1376			msg_freehdr(msghdr);
1377			wakeup(msqkptr);
1378			goto done2;
1379		}
1380		msgp = (char *)msgp + tlen;
1381		next = msgmaps[next].next;
1382	}
1383
1384	/*
1385	 * Done, return the actual number of bytes copied out.
1386	 */
1387
1388	msg_freehdr(msghdr);
1389	wakeup(msqkptr);
1390	td->td_retval[0] = msgsz;
1391done2:
1392	mtx_unlock(&msq_mtx);
1393	return (error);
1394}
1395
1396int
1397sys_msgrcv(td, uap)
1398	struct thread *td;
1399	register struct msgrcv_args *uap;
1400{
1401	int error;
1402	long mtype;
1403
1404	DPRINTF(("call to msgrcv(%d, %p, %zu, %ld, %d)\n", uap->msqid,
1405	    uap->msgp, uap->msgsz, uap->msgtyp, uap->msgflg));
1406
1407	if ((error = kern_msgrcv(td, uap->msqid,
1408	    (char *)uap->msgp + sizeof(mtype), uap->msgsz,
1409	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1410		return (error);
1411	if ((error = copyout(&mtype, uap->msgp, sizeof(mtype))) != 0)
1412		DPRINTF(("error %d copying the message type\n", error));
1413	return (error);
1414}
1415
1416static int
1417sysctl_msqids(SYSCTL_HANDLER_ARGS)
1418{
1419	struct msqid_kernel tmsqk;
1420	struct prison *pr, *rpr;
1421	int error, i;
1422
1423	pr = req->td->td_ucred->cr_prison;
1424	rpr = msg_find_prison(req->td->td_ucred);
1425	error = 0;
1426	for (i = 0; i < msginfo.msgmni; i++) {
1427		mtx_lock(&msq_mtx);
1428		if (msqids[i].u.msg_qbytes == 0 || rpr == NULL ||
1429		    msq_prison_cansee(rpr, &msqids[i]) != 0)
1430			bzero(&tmsqk, sizeof(tmsqk));
1431		else {
1432			tmsqk = msqids[i];
1433			if (tmsqk.cred->cr_prison != pr)
1434				tmsqk.u.msg_perm.key = IPC_PRIVATE;
1435		}
1436		mtx_unlock(&msq_mtx);
1437		error = SYSCTL_OUT(req, &tmsqk, sizeof(tmsqk));
1438		if (error != 0)
1439			break;
1440	}
1441	return (error);
1442}
1443
1444SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1445    "Maximum message size");
1446SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0,
1447    "Number of message queue identifiers");
1448SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0,
1449    "Maximum number of bytes in a queue");
1450SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0,
1451    "Maximum number of messages in the system");
1452SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0,
1453    "Size of a message segment");
1454SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
1455    "Number of message segments");
1456SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids,
1457    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
1458    NULL, 0, sysctl_msqids, "", "Message queue IDs");
1459
1460static int
1461msg_prison_check(void *obj, void *data)
1462{
1463	struct prison *pr = obj;
1464	struct prison *prpr;
1465	struct vfsoptlist *opts = data;
1466	int error, jsys;
1467
1468	/*
1469	 * sysvmsg is a jailsys integer.
1470	 * It must be "disable" if the parent jail is disabled.
1471	 */
1472	error = vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys));
1473	if (error != ENOENT) {
1474		if (error != 0)
1475			return (error);
1476		switch (jsys) {
1477		case JAIL_SYS_DISABLE:
1478			break;
1479		case JAIL_SYS_NEW:
1480		case JAIL_SYS_INHERIT:
1481			prison_lock(pr->pr_parent);
1482			prpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
1483			prison_unlock(pr->pr_parent);
1484			if (prpr == NULL)
1485				return (EPERM);
1486			break;
1487		default:
1488			return (EINVAL);
1489		}
1490	}
1491
1492	return (0);
1493}
1494
1495static int
1496msg_prison_set(void *obj, void *data)
1497{
1498	struct prison *pr = obj;
1499	struct prison *tpr, *orpr, *nrpr, *trpr;
1500	struct vfsoptlist *opts = data;
1501	void *rsv;
1502	int jsys, descend;
1503
1504	/*
1505	 * sysvmsg controls which jail is the root of the associated msgs (this
1506	 * jail or same as the parent), or if the feature is available at all.
1507	 */
1508	if (vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)) == ENOENT)
1509		jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
1510		    ? JAIL_SYS_INHERIT
1511		    : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
1512		    ? JAIL_SYS_DISABLE
1513		    : -1;
1514	if (jsys == JAIL_SYS_DISABLE) {
1515		prison_lock(pr);
1516		orpr = osd_jail_get(pr, msg_prison_slot);
1517		if (orpr != NULL)
1518			osd_jail_del(pr, msg_prison_slot);
1519		prison_unlock(pr);
1520		if (orpr != NULL) {
1521			if (orpr == pr)
1522				msg_prison_cleanup(pr);
1523			/* Disable all child jails as well. */
1524			FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
1525				prison_lock(tpr);
1526				trpr = osd_jail_get(tpr, msg_prison_slot);
1527				if (trpr != NULL) {
1528					osd_jail_del(tpr, msg_prison_slot);
1529					prison_unlock(tpr);
1530					if (trpr == tpr)
1531						msg_prison_cleanup(tpr);
1532				} else {
1533					prison_unlock(tpr);
1534					descend = 0;
1535				}
1536			}
1537		}
1538	} else if (jsys != -1) {
1539		if (jsys == JAIL_SYS_NEW)
1540			nrpr = pr;
1541		else {
1542			prison_lock(pr->pr_parent);
1543			nrpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
1544			prison_unlock(pr->pr_parent);
1545		}
1546		rsv = osd_reserve(msg_prison_slot);
1547		prison_lock(pr);
1548		orpr = osd_jail_get(pr, msg_prison_slot);
1549		if (orpr != nrpr)
1550			(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
1551			    nrpr);
1552		else
1553			osd_free_reserved(rsv);
1554		prison_unlock(pr);
1555		if (orpr != nrpr) {
1556			if (orpr == pr)
1557				msg_prison_cleanup(pr);
1558			if (orpr != NULL) {
1559				/* Change child jails matching the old root, */
1560				FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
1561					prison_lock(tpr);
1562					trpr = osd_jail_get(tpr,
1563					    msg_prison_slot);
1564					if (trpr == orpr) {
1565						(void)osd_jail_set(tpr,
1566						    msg_prison_slot, nrpr);
1567						prison_unlock(tpr);
1568						if (trpr == tpr)
1569							msg_prison_cleanup(tpr);
1570					} else {
1571						prison_unlock(tpr);
1572						descend = 0;
1573					}
1574				}
1575			}
1576		}
1577	}
1578
1579	return (0);
1580}
1581
1582static int
1583msg_prison_get(void *obj, void *data)
1584{
1585	struct prison *pr = obj;
1586	struct prison *rpr;
1587	struct vfsoptlist *opts = data;
1588	int error, jsys;
1589
1590	/* Set sysvmsg based on the jail's root prison. */
1591	prison_lock(pr);
1592	rpr = osd_jail_get(pr, msg_prison_slot);
1593	prison_unlock(pr);
1594	jsys = rpr == NULL ? JAIL_SYS_DISABLE
1595	    : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
1596	error = vfs_setopt(opts, "sysvmsg", &jsys, sizeof(jsys));
1597	if (error == ENOENT)
1598		error = 0;
1599	return (error);
1600}
1601
1602static int
1603msg_prison_remove(void *obj, void *data __unused)
1604{
1605	struct prison *pr = obj;
1606	struct prison *rpr;
1607
1608	prison_lock(pr);
1609	rpr = osd_jail_get(pr, msg_prison_slot);
1610	prison_unlock(pr);
1611	if (rpr == pr)
1612		msg_prison_cleanup(pr);
1613	return (0);
1614}
1615
1616static void
1617msg_prison_cleanup(struct prison *pr)
1618{
1619	struct msqid_kernel *msqkptr;
1620	int i;
1621
1622	/* Remove any msqs that belong to this jail. */
1623	mtx_lock(&msq_mtx);
1624	for (i = 0; i < msginfo.msgmni; i++) {
1625		msqkptr = &msqids[i];
1626		if (msqkptr->u.msg_qbytes != 0 &&
1627		    msqkptr->cred != NULL && msqkptr->cred->cr_prison == pr)
1628			msq_remove(msqkptr);
1629	}
1630	mtx_unlock(&msq_mtx);
1631}
1632
1633SYSCTL_JAIL_PARAM_SYS_NODE(sysvmsg, CTLFLAG_RW, "SYSV message queues");
1634
1635#ifdef COMPAT_FREEBSD32
1636int
1637freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap)
1638{
1639
1640#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1641    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1642	switch (uap->which) {
1643	case 0:
1644		return (freebsd7_freebsd32_msgctl(td,
1645		    (struct freebsd7_freebsd32_msgctl_args *)&uap->a2));
1646	case 2:
1647		return (freebsd32_msgsnd(td,
1648		    (struct freebsd32_msgsnd_args *)&uap->a2));
1649	case 3:
1650		return (freebsd32_msgrcv(td,
1651		    (struct freebsd32_msgrcv_args *)&uap->a2));
1652	default:
1653		return (sys_msgsys(td, (struct msgsys_args *)uap));
1654	}
1655#else
1656	return (nosys(td, NULL));
1657#endif
1658}
1659
1660#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1661    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1662int
1663freebsd7_freebsd32_msgctl(struct thread *td,
1664    struct freebsd7_freebsd32_msgctl_args *uap)
1665{
1666	struct msqid_ds msqbuf;
1667	struct msqid_ds32_old msqbuf32;
1668	int error;
1669
1670	if (uap->cmd == IPC_SET) {
1671		error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32));
1672		if (error)
1673			return (error);
1674		freebsd32_ipcperm_old_in(&msqbuf32.msg_perm, &msqbuf.msg_perm);
1675		PTRIN_CP(msqbuf32, msqbuf, msg_first);
1676		PTRIN_CP(msqbuf32, msqbuf, msg_last);
1677		CP(msqbuf32, msqbuf, msg_cbytes);
1678		CP(msqbuf32, msqbuf, msg_qnum);
1679		CP(msqbuf32, msqbuf, msg_qbytes);
1680		CP(msqbuf32, msqbuf, msg_lspid);
1681		CP(msqbuf32, msqbuf, msg_lrpid);
1682		CP(msqbuf32, msqbuf, msg_stime);
1683		CP(msqbuf32, msqbuf, msg_rtime);
1684		CP(msqbuf32, msqbuf, msg_ctime);
1685	}
1686	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1687	if (error)
1688		return (error);
1689	if (uap->cmd == IPC_STAT) {
1690		bzero(&msqbuf32, sizeof(msqbuf32));
1691		freebsd32_ipcperm_old_out(&msqbuf.msg_perm, &msqbuf32.msg_perm);
1692		PTROUT_CP(msqbuf, msqbuf32, msg_first);
1693		PTROUT_CP(msqbuf, msqbuf32, msg_last);
1694		CP(msqbuf, msqbuf32, msg_cbytes);
1695		CP(msqbuf, msqbuf32, msg_qnum);
1696		CP(msqbuf, msqbuf32, msg_qbytes);
1697		CP(msqbuf, msqbuf32, msg_lspid);
1698		CP(msqbuf, msqbuf32, msg_lrpid);
1699		CP(msqbuf, msqbuf32, msg_stime);
1700		CP(msqbuf, msqbuf32, msg_rtime);
1701		CP(msqbuf, msqbuf32, msg_ctime);
1702		error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32));
1703	}
1704	return (error);
1705}
1706#endif
1707
1708int
1709freebsd32_msgctl(struct thread *td, struct freebsd32_msgctl_args *uap)
1710{
1711	struct msqid_ds msqbuf;
1712	struct msqid_ds32 msqbuf32;
1713	int error;
1714
1715	if (uap->cmd == IPC_SET) {
1716		error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32));
1717		if (error)
1718			return (error);
1719		freebsd32_ipcperm_in(&msqbuf32.msg_perm, &msqbuf.msg_perm);
1720		PTRIN_CP(msqbuf32, msqbuf, msg_first);
1721		PTRIN_CP(msqbuf32, msqbuf, msg_last);
1722		CP(msqbuf32, msqbuf, msg_cbytes);
1723		CP(msqbuf32, msqbuf, msg_qnum);
1724		CP(msqbuf32, msqbuf, msg_qbytes);
1725		CP(msqbuf32, msqbuf, msg_lspid);
1726		CP(msqbuf32, msqbuf, msg_lrpid);
1727		CP(msqbuf32, msqbuf, msg_stime);
1728		CP(msqbuf32, msqbuf, msg_rtime);
1729		CP(msqbuf32, msqbuf, msg_ctime);
1730	}
1731	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1732	if (error)
1733		return (error);
1734	if (uap->cmd == IPC_STAT) {
1735		freebsd32_ipcperm_out(&msqbuf.msg_perm, &msqbuf32.msg_perm);
1736		PTROUT_CP(msqbuf, msqbuf32, msg_first);
1737		PTROUT_CP(msqbuf, msqbuf32, msg_last);
1738		CP(msqbuf, msqbuf32, msg_cbytes);
1739		CP(msqbuf, msqbuf32, msg_qnum);
1740		CP(msqbuf, msqbuf32, msg_qbytes);
1741		CP(msqbuf, msqbuf32, msg_lspid);
1742		CP(msqbuf, msqbuf32, msg_lrpid);
1743		CP(msqbuf, msqbuf32, msg_stime);
1744		CP(msqbuf, msqbuf32, msg_rtime);
1745		CP(msqbuf, msqbuf32, msg_ctime);
1746		error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32));
1747	}
1748	return (error);
1749}
1750
1751int
1752freebsd32_msgsnd(struct thread *td, struct freebsd32_msgsnd_args *uap)
1753{
1754	const void *msgp;
1755	long mtype;
1756	int32_t mtype32;
1757	int error;
1758
1759	msgp = PTRIN(uap->msgp);
1760	if ((error = copyin(msgp, &mtype32, sizeof(mtype32))) != 0)
1761		return (error);
1762	mtype = mtype32;
1763	return (kern_msgsnd(td, uap->msqid,
1764	    (const char *)msgp + sizeof(mtype32),
1765	    uap->msgsz, uap->msgflg, mtype));
1766}
1767
1768int
1769freebsd32_msgrcv(struct thread *td, struct freebsd32_msgrcv_args *uap)
1770{
1771	void *msgp;
1772	long mtype;
1773	int32_t mtype32;
1774	int error;
1775
1776	msgp = PTRIN(uap->msgp);
1777	if ((error = kern_msgrcv(td, uap->msqid,
1778	    (char *)msgp + sizeof(mtype32), uap->msgsz,
1779	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1780		return (error);
1781	mtype32 = (int32_t)mtype;
1782	return (copyout(&mtype32, msgp, sizeof(mtype32)));
1783}
1784#endif
1785
1786#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1787    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1788
1789/* XXX casting to (sy_call_t *) is bogus, as usual. */
1790static sy_call_t *msgcalls[] = {
1791	(sy_call_t *)freebsd7_msgctl, (sy_call_t *)sys_msgget,
1792	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
1793};
1794
1795/*
1796 * Entry point for all MSG calls.
1797 */
1798int
1799sys_msgsys(td, uap)
1800	struct thread *td;
1801	/* XXX actually varargs. */
1802	struct msgsys_args /* {
1803		int	which;
1804		int	a2;
1805		int	a3;
1806		int	a4;
1807		int	a5;
1808		int	a6;
1809	} */ *uap;
1810{
1811	int error;
1812
1813	if (uap->which < 0 || uap->which >= nitems(msgcalls))
1814		return (EINVAL);
1815	error = (*msgcalls[uap->which])(td, &uap->a2);
1816	return (error);
1817}
1818
1819#ifndef CP
1820#define CP(src, dst, fld)	do { (dst).fld = (src).fld; } while (0)
1821#endif
1822
1823#ifndef _SYS_SYSPROTO_H_
1824struct freebsd7_msgctl_args {
1825	int	msqid;
1826	int	cmd;
1827	struct	msqid_ds_old *buf;
1828};
1829#endif
1830int
1831freebsd7_msgctl(td, uap)
1832	struct thread *td;
1833	struct freebsd7_msgctl_args *uap;
1834{
1835	struct msqid_ds_old msqold;
1836	struct msqid_ds msqbuf;
1837	int error;
1838
1839	DPRINTF(("call to freebsd7_msgctl(%d, %d, %p)\n", uap->msqid, uap->cmd,
1840	    uap->buf));
1841	if (uap->cmd == IPC_SET) {
1842		error = copyin(uap->buf, &msqold, sizeof(msqold));
1843		if (error)
1844			return (error);
1845		ipcperm_old2new(&msqold.msg_perm, &msqbuf.msg_perm);
1846		CP(msqold, msqbuf, msg_first);
1847		CP(msqold, msqbuf, msg_last);
1848		CP(msqold, msqbuf, msg_cbytes);
1849		CP(msqold, msqbuf, msg_qnum);
1850		CP(msqold, msqbuf, msg_qbytes);
1851		CP(msqold, msqbuf, msg_lspid);
1852		CP(msqold, msqbuf, msg_lrpid);
1853		CP(msqold, msqbuf, msg_stime);
1854		CP(msqold, msqbuf, msg_rtime);
1855		CP(msqold, msqbuf, msg_ctime);
1856	}
1857	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1858	if (error)
1859		return (error);
1860	if (uap->cmd == IPC_STAT) {
1861		bzero(&msqold, sizeof(msqold));
1862		ipcperm_new2old(&msqbuf.msg_perm, &msqold.msg_perm);
1863		CP(msqbuf, msqold, msg_first);
1864		CP(msqbuf, msqold, msg_last);
1865		CP(msqbuf, msqold, msg_cbytes);
1866		CP(msqbuf, msqold, msg_qnum);
1867		CP(msqbuf, msqold, msg_qbytes);
1868		CP(msqbuf, msqold, msg_lspid);
1869		CP(msqbuf, msqold, msg_lrpid);
1870		CP(msqbuf, msqold, msg_stime);
1871		CP(msqbuf, msqold, msg_rtime);
1872		CP(msqbuf, msqold, msg_ctime);
1873		error = copyout(&msqold, uap->buf, sizeof(struct msqid_ds_old));
1874	}
1875	return (error);
1876}
1877
1878#undef CP
1879
1880#endif	/* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 ||
1881	   COMPAT_FREEBSD7 */
1882