1/*-
2 * Implementation of SVID messages
3 *
4 * Author:  Daniel Boulet
5 *
6 * Copyright 1993 Daniel Boulet and RTMX Inc.
7 *
8 * This system call was implemented by Daniel Boulet under contract from RTMX.
9 *
10 * Redistribution and use in source forms, with and without modification,
11 * are permitted provided that this entire comment appears intact.
12 *
13 * Redistribution in binary form may occur without any restrictions.
14 * Obviously, it would be nice if you gave credit where credit is due
15 * but requiring it would be too onerous.
16 *
17 * This software is provided ``AS IS'' without any warranties of any kind.
18 */
19/*-
20 * Copyright (c) 2003-2005 McAfee, Inc.
21 * All rights reserved.
22 *
23 * This software was developed for the FreeBSD Project in part by McAfee
24 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
25 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
26 * program.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 *    notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 *    notice, this list of conditions and the following disclaimer in the
35 *    documentation and/or other materials provided with the distribution.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: stable/10/sys/kern/sysv_msg.c 329741 2018-02-21 18:32:57Z brooks $");
52
53#include "opt_compat.h"
54#include "opt_sysvipc.h"
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/sysproto.h>
59#include <sys/kernel.h>
60#include <sys/priv.h>
61#include <sys/proc.h>
62#include <sys/lock.h>
63#include <sys/mutex.h>
64#include <sys/module.h>
65#include <sys/mount.h>
66#include <sys/msg.h>
67#include <sys/racct.h>
68#include <sys/sx.h>
69#include <sys/syscall.h>
70#include <sys/syscallsubr.h>
71#include <sys/sysent.h>
72#include <sys/sysctl.h>
73#include <sys/malloc.h>
74#include <sys/jail.h>
75
76#include <security/mac/mac_framework.h>
77
78FEATURE(sysv_msg, "System V message queues support");
79
80static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
81
82static int msginit(void);
83static int msgunload(void);
84static int sysvmsg_modload(struct module *, int, void *);
85static void msq_remove(struct msqid_kernel *);
86static struct prison *msg_find_prison(struct ucred *);
87static int msq_prison_cansee(struct prison *, struct msqid_kernel *);
88static int msg_prison_check(void *, void *);
89static int msg_prison_set(void *, void *);
90static int msg_prison_get(void *, void *);
91static int msg_prison_remove(void *, void *);
92static void msg_prison_cleanup(struct prison *);
93
94
95#ifdef MSG_DEBUG
96#define DPRINTF(a)	printf a
97#else
98#define DPRINTF(a)	(void)0
99#endif
100
101static void msg_freehdr(struct msg *msghdr);
102
103#ifndef MSGSSZ
104#define MSGSSZ	8		/* Each segment must be 2^N long */
105#endif
106#ifndef MSGSEG
107#define MSGSEG	2048		/* must be less than 32767 */
108#endif
109#define MSGMAX	(MSGSSZ*MSGSEG)
110#ifndef MSGMNB
111#define MSGMNB	2048		/* max # of bytes in a queue */
112#endif
113#ifndef MSGMNI
114#define MSGMNI	40
115#endif
116#ifndef MSGTQL
117#define MSGTQL	40
118#endif
119
120/*
121 * Based on the configuration parameters described in an SVR2 (yes, two)
122 * config(1m) man page.
123 *
124 * Each message is broken up and stored in segments that are msgssz bytes
125 * long.  For efficiency reasons, this should be a power of two.  Also,
126 * it doesn't make sense if it is less than 8 or greater than about 256.
127 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
128 * two between 8 and 1024 inclusive (and panic's if it isn't).
129 */
130struct msginfo msginfo = {
131                MSGMAX,         /* max chars in a message */
132                MSGMNI,         /* # of message queue identifiers */
133                MSGMNB,         /* max chars in a queue */
134                MSGTQL,         /* max messages in system */
135                MSGSSZ,         /* size of a message segment */
136                		/* (must be small power of 2 greater than 4) */
137                MSGSEG          /* number of message segments */
138};
139
140/*
141 * macros to convert between msqid_ds's and msqid's.
142 * (specific to this implementation)
143 */
144#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
145#define MSQID_IX(id)	((id) & 0xffff)
146#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
147
148/*
149 * The rest of this file is specific to this particular implementation.
150 */
151
152struct msgmap {
153	short	next;		/* next segment in buffer */
154    				/* -1 -> available */
155    				/* 0..(MSGSEG-1) -> index of next segment */
156};
157
158#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
159
160static int nfree_msgmaps;	/* # of free map entries */
161static short free_msgmaps;	/* head of linked list of free map entries */
162static struct msg *free_msghdrs;/* list of free msg headers */
163static char *msgpool;		/* MSGMAX byte long msg buffer pool */
164static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
165static struct msg *msghdrs;	/* MSGTQL msg headers */
166static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
167static struct mtx msq_mtx;	/* global mutex for message queues. */
168static unsigned msg_prison_slot;/* prison OSD slot */
169
170static struct syscall_helper_data msg_syscalls[] = {
171	SYSCALL_INIT_HELPER(msgctl),
172	SYSCALL_INIT_HELPER(msgget),
173	SYSCALL_INIT_HELPER(msgsnd),
174	SYSCALL_INIT_HELPER(msgrcv),
175#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
176    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
177	SYSCALL_INIT_HELPER(msgsys),
178	SYSCALL_INIT_HELPER_COMPAT(freebsd7_msgctl),
179#endif
180	SYSCALL_INIT_LAST
181};
182
183#ifdef COMPAT_FREEBSD32
184#include <compat/freebsd32/freebsd32.h>
185#include <compat/freebsd32/freebsd32_ipc.h>
186#include <compat/freebsd32/freebsd32_proto.h>
187#include <compat/freebsd32/freebsd32_signal.h>
188#include <compat/freebsd32/freebsd32_syscall.h>
189#include <compat/freebsd32/freebsd32_util.h>
190
191static struct syscall_helper_data msg32_syscalls[] = {
192	SYSCALL32_INIT_HELPER(freebsd32_msgctl),
193	SYSCALL32_INIT_HELPER(freebsd32_msgsnd),
194	SYSCALL32_INIT_HELPER(freebsd32_msgrcv),
195	SYSCALL32_INIT_HELPER_COMPAT(msgget),
196	SYSCALL32_INIT_HELPER(freebsd32_msgsys),
197#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
198    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
199	SYSCALL32_INIT_HELPER(freebsd7_freebsd32_msgctl),
200#endif
201	SYSCALL_INIT_LAST
202};
203#endif
204
205static int
206msginit()
207{
208	struct prison *pr;
209	void *rsv;
210	int i, error;
211	osd_method_t methods[PR_MAXMETHOD] = {
212	    [PR_METHOD_CHECK] =		msg_prison_check,
213	    [PR_METHOD_SET] =		msg_prison_set,
214	    [PR_METHOD_GET] =		msg_prison_get,
215	    [PR_METHOD_REMOVE] =	msg_prison_remove,
216	};
217
218	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
219	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
220	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
221	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
222	TUNABLE_INT_FETCH("kern.ipc.msgmnb", &msginfo.msgmnb);
223	TUNABLE_INT_FETCH("kern.ipc.msgtql", &msginfo.msgtql);
224
225	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
226	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
227	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
228	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
229	    M_WAITOK);
230
231	/*
232	 * msginfo.msgssz should be a power of two for efficiency reasons.
233	 * It is also pretty silly if msginfo.msgssz is less than 8
234	 * or greater than about 256 so ...
235	 */
236
237	i = 8;
238	while (i < 1024 && i != msginfo.msgssz)
239		i <<= 1;
240    	if (i != msginfo.msgssz) {
241		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
242		    msginfo.msgssz));
243		panic("msginfo.msgssz not a small power of 2");
244	}
245
246	if (msginfo.msgseg > 32767) {
247		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
248		panic("msginfo.msgseg > 32767");
249	}
250
251	for (i = 0; i < msginfo.msgseg; i++) {
252		if (i > 0)
253			msgmaps[i-1].next = i;
254		msgmaps[i].next = -1;	/* implies entry is available */
255	}
256	free_msgmaps = 0;
257	nfree_msgmaps = msginfo.msgseg;
258
259	for (i = 0; i < msginfo.msgtql; i++) {
260		msghdrs[i].msg_type = 0;
261		if (i > 0)
262			msghdrs[i-1].msg_next = &msghdrs[i];
263		msghdrs[i].msg_next = NULL;
264#ifdef MAC
265		mac_sysvmsg_init(&msghdrs[i]);
266#endif
267    	}
268	free_msghdrs = &msghdrs[0];
269
270	for (i = 0; i < msginfo.msgmni; i++) {
271		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
272		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
273		msqids[i].u.msg_perm.mode = 0;
274#ifdef MAC
275		mac_sysvmsq_init(&msqids[i]);
276#endif
277	}
278	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
279
280	/* Set current prisons according to their allow.sysvipc. */
281	msg_prison_slot = osd_jail_register(NULL, methods);
282	rsv = osd_reserve(msg_prison_slot);
283	prison_lock(&prison0);
284	(void)osd_jail_set_reserved(&prison0, msg_prison_slot, rsv, &prison0);
285	prison_unlock(&prison0);
286	rsv = NULL;
287	sx_slock(&allprison_lock);
288	TAILQ_FOREACH(pr, &allprison, pr_list) {
289		if (rsv == NULL)
290			rsv = osd_reserve(msg_prison_slot);
291		prison_lock(pr);
292		if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
293			(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
294			    &prison0);
295			rsv = NULL;
296		}
297		prison_unlock(pr);
298	}
299	if (rsv != NULL)
300		osd_free_reserved(rsv);
301	sx_sunlock(&allprison_lock);
302
303	error = syscall_helper_register(msg_syscalls);
304	if (error != 0)
305		return (error);
306#ifdef COMPAT_FREEBSD32
307	error = syscall32_helper_register(msg32_syscalls);
308	if (error != 0)
309		return (error);
310#endif
311	return (0);
312}
313
314static int
315msgunload()
316{
317	struct msqid_kernel *msqkptr;
318	int msqid;
319#ifdef MAC
320	int i;
321#endif
322
323	syscall_helper_unregister(msg_syscalls);
324#ifdef COMPAT_FREEBSD32
325	syscall32_helper_unregister(msg32_syscalls);
326#endif
327
328	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
329		msqkptr = &msqids[msqid];
330		if (msqkptr->u.msg_qbytes != 0 ||
331		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
332			break;
333	}
334	if (msqid != msginfo.msgmni)
335		return (EBUSY);
336
337	if (msg_prison_slot != 0)
338		osd_jail_deregister(msg_prison_slot);
339#ifdef MAC
340	for (i = 0; i < msginfo.msgtql; i++)
341		mac_sysvmsg_destroy(&msghdrs[i]);
342	for (msqid = 0; msqid < msginfo.msgmni; msqid++)
343		mac_sysvmsq_destroy(&msqids[msqid]);
344#endif
345	free(msgpool, M_MSG);
346	free(msgmaps, M_MSG);
347	free(msghdrs, M_MSG);
348	free(msqids, M_MSG);
349	mtx_destroy(&msq_mtx);
350	return (0);
351}
352
353
354static int
355sysvmsg_modload(struct module *module, int cmd, void *arg)
356{
357	int error = 0;
358
359	switch (cmd) {
360	case MOD_LOAD:
361		error = msginit();
362		if (error != 0)
363			msgunload();
364		break;
365	case MOD_UNLOAD:
366		error = msgunload();
367		break;
368	case MOD_SHUTDOWN:
369		break;
370	default:
371		error = EINVAL;
372		break;
373	}
374	return (error);
375}
376
377static moduledata_t sysvmsg_mod = {
378	"sysvmsg",
379	&sysvmsg_modload,
380	NULL
381};
382
383DECLARE_MODULE(sysvmsg, sysvmsg_mod, SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
384MODULE_VERSION(sysvmsg, 1);
385
386static void
387msg_freehdr(msghdr)
388	struct msg *msghdr;
389{
390	while (msghdr->msg_ts > 0) {
391		short next;
392		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
393			panic("msghdr->msg_spot out of range");
394		next = msgmaps[msghdr->msg_spot].next;
395		msgmaps[msghdr->msg_spot].next = free_msgmaps;
396		free_msgmaps = msghdr->msg_spot;
397		nfree_msgmaps++;
398		msghdr->msg_spot = next;
399		if (msghdr->msg_ts >= msginfo.msgssz)
400			msghdr->msg_ts -= msginfo.msgssz;
401		else
402			msghdr->msg_ts = 0;
403	}
404	if (msghdr->msg_spot != -1)
405		panic("msghdr->msg_spot != -1");
406	msghdr->msg_next = free_msghdrs;
407	free_msghdrs = msghdr;
408#ifdef MAC
409	mac_sysvmsg_cleanup(msghdr);
410#endif
411}
412
413static void
414msq_remove(struct msqid_kernel *msqkptr)
415{
416	struct msg *msghdr;
417
418	racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
419	racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
420	racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
421	crfree(msqkptr->cred);
422	msqkptr->cred = NULL;
423
424	/* Free the message headers */
425	msghdr = msqkptr->u.msg_first;
426	while (msghdr != NULL) {
427		struct msg *msghdr_tmp;
428
429		/* Free the segments of each message */
430		msqkptr->u.msg_cbytes -= msghdr->msg_ts;
431		msqkptr->u.msg_qnum--;
432		msghdr_tmp = msghdr;
433		msghdr = msghdr->msg_next;
434		msg_freehdr(msghdr_tmp);
435	}
436
437	if (msqkptr->u.msg_cbytes != 0)
438		panic("msg_cbytes is screwed up");
439	if (msqkptr->u.msg_qnum != 0)
440		panic("msg_qnum is screwed up");
441
442	msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
443
444#ifdef MAC
445	mac_sysvmsq_cleanup(msqkptr);
446#endif
447
448	wakeup(msqkptr);
449}
450
451static struct prison *
452msg_find_prison(struct ucred *cred)
453{
454	struct prison *pr, *rpr;
455
456	pr = cred->cr_prison;
457	prison_lock(pr);
458	rpr = osd_jail_get(pr, msg_prison_slot);
459	prison_unlock(pr);
460	return rpr;
461}
462
463static int
464msq_prison_cansee(struct prison *rpr, struct msqid_kernel *msqkptr)
465{
466
467	if (msqkptr->cred == NULL ||
468	    !(rpr == msqkptr->cred->cr_prison ||
469	      prison_ischild(rpr, msqkptr->cred->cr_prison)))
470		return (EINVAL);
471	return (0);
472}
473
474#ifndef _SYS_SYSPROTO_H_
475struct msgctl_args {
476	int	msqid;
477	int	cmd;
478	struct	msqid_ds *buf;
479};
480#endif
481int
482sys_msgctl(td, uap)
483	struct thread *td;
484	register struct msgctl_args *uap;
485{
486	int msqid = uap->msqid;
487	int cmd = uap->cmd;
488	struct msqid_ds msqbuf;
489	int error;
490
491	DPRINTF(("call to msgctl(%d, %d, %p)\n", msqid, cmd, uap->buf));
492	if (cmd == IPC_SET &&
493	    (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0)
494		return (error);
495	error = kern_msgctl(td, msqid, cmd, &msqbuf);
496	if (cmd == IPC_STAT && error == 0)
497		error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds));
498	return (error);
499}
500
501int
502kern_msgctl(td, msqid, cmd, msqbuf)
503	struct thread *td;
504	int msqid;
505	int cmd;
506	struct msqid_ds *msqbuf;
507{
508	int rval, error, msqix;
509	register struct msqid_kernel *msqkptr;
510	struct prison *rpr;
511
512	rpr = msg_find_prison(td->td_ucred);
513	if (rpr == NULL)
514		return (ENOSYS);
515
516	msqix = IPCID_TO_IX(msqid);
517
518	if (msqix < 0 || msqix >= msginfo.msgmni) {
519		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
520		    msginfo.msgmni));
521		return (EINVAL);
522	}
523
524	msqkptr = &msqids[msqix];
525
526	mtx_lock(&msq_mtx);
527	if (msqkptr->u.msg_qbytes == 0) {
528		DPRINTF(("no such msqid\n"));
529		error = EINVAL;
530		goto done2;
531	}
532	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
533		DPRINTF(("wrong sequence number\n"));
534		error = EINVAL;
535		goto done2;
536	}
537
538	error = msq_prison_cansee(rpr, msqkptr);
539	if (error != 0) {
540		DPRINTF(("requester can't see prison\n"));
541		goto done2;
542	}
543
544#ifdef MAC
545	error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
546	if (error != 0)
547		goto done2;
548#endif
549
550	error = 0;
551	rval = 0;
552
553	switch (cmd) {
554
555	case IPC_RMID:
556	{
557#ifdef MAC
558		struct msg *msghdr;
559#endif
560		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
561			goto done2;
562
563#ifdef MAC
564		/*
565		 * Check that the thread has MAC access permissions to
566		 * individual msghdrs.  Note: We need to do this in a
567		 * separate loop because the actual loop alters the
568		 * msq/msghdr info as it progresses, and there is no going
569		 * back if half the way through we discover that the
570		 * thread cannot free a certain msghdr.  The msq will get
571		 * into an inconsistent state.
572		 */
573		for (msghdr = msqkptr->u.msg_first; msghdr != NULL;
574		    msghdr = msghdr->msg_next) {
575			error = mac_sysvmsq_check_msgrmid(td->td_ucred, msghdr);
576			if (error != 0)
577				goto done2;
578		}
579#endif
580
581		msq_remove(msqkptr);
582	}
583
584		break;
585
586	case IPC_SET:
587		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
588			goto done2;
589		if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) {
590			error = priv_check(td, PRIV_IPC_MSGSIZE);
591			if (error)
592				goto done2;
593		}
594		if (msqbuf->msg_qbytes > msginfo.msgmnb) {
595			DPRINTF(("can't increase msg_qbytes beyond %d"
596			    "(truncating)\n", msginfo.msgmnb));
597			msqbuf->msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
598		}
599		if (msqbuf->msg_qbytes == 0) {
600			DPRINTF(("can't reduce msg_qbytes to 0\n"));
601			error = EINVAL;		/* non-standard errno! */
602			goto done2;
603		}
604		msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid;	/* change the owner */
605		msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid;	/* change the owner */
606		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
607		    (msqbuf->msg_perm.mode & 0777);
608		msqkptr->u.msg_qbytes = msqbuf->msg_qbytes;
609		msqkptr->u.msg_ctime = time_second;
610		break;
611
612	case IPC_STAT:
613		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
614			DPRINTF(("requester doesn't have read access\n"));
615			goto done2;
616		}
617		*msqbuf = msqkptr->u;
618		if (td->td_ucred->cr_prison != msqkptr->cred->cr_prison)
619			msqbuf->msg_perm.key = IPC_PRIVATE;
620		break;
621
622	default:
623		DPRINTF(("invalid command %d\n", cmd));
624		error = EINVAL;
625		goto done2;
626	}
627
628	if (error == 0)
629		td->td_retval[0] = rval;
630done2:
631	mtx_unlock(&msq_mtx);
632	return (error);
633}
634
635#ifndef _SYS_SYSPROTO_H_
636struct msgget_args {
637	key_t	key;
638	int	msgflg;
639};
640#endif
641
642int
643sys_msgget(td, uap)
644	struct thread *td;
645	register struct msgget_args *uap;
646{
647	int msqid, error = 0;
648	int key = uap->key;
649	int msgflg = uap->msgflg;
650	struct ucred *cred = td->td_ucred;
651	register struct msqid_kernel *msqkptr = NULL;
652
653	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
654
655	if (msg_find_prison(cred) == NULL)
656		return (ENOSYS);
657
658	mtx_lock(&msq_mtx);
659	if (key != IPC_PRIVATE) {
660		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
661			msqkptr = &msqids[msqid];
662			if (msqkptr->u.msg_qbytes != 0 &&
663			    msqkptr->cred != NULL &&
664			    msqkptr->cred->cr_prison == cred->cr_prison &&
665			    msqkptr->u.msg_perm.key == key)
666				break;
667		}
668		if (msqid < msginfo.msgmni) {
669			DPRINTF(("found public key\n"));
670			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
671				DPRINTF(("not exclusive\n"));
672				error = EEXIST;
673				goto done2;
674			}
675			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
676			    msgflg & 0700))) {
677				DPRINTF(("requester doesn't have 0%o access\n",
678				    msgflg & 0700));
679				goto done2;
680			}
681#ifdef MAC
682			error = mac_sysvmsq_check_msqget(cred, msqkptr);
683			if (error != 0)
684				goto done2;
685#endif
686			goto found;
687		}
688	}
689
690	DPRINTF(("need to allocate the msqid_ds\n"));
691	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
692		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
693			/*
694			 * Look for an unallocated and unlocked msqid_ds.
695			 * msqid_ds's can be locked by msgsnd or msgrcv while
696			 * they are copying the message in/out.  We can't
697			 * re-use the entry until they release it.
698			 */
699			msqkptr = &msqids[msqid];
700			if (msqkptr->u.msg_qbytes == 0 &&
701			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
702				break;
703		}
704		if (msqid == msginfo.msgmni) {
705			DPRINTF(("no more msqid_ds's available\n"));
706			error = ENOSPC;
707			goto done2;
708		}
709#ifdef RACCT
710		if (racct_enable) {
711			PROC_LOCK(td->td_proc);
712			error = racct_add(td->td_proc, RACCT_NMSGQ, 1);
713			PROC_UNLOCK(td->td_proc);
714			if (error != 0) {
715				error = ENOSPC;
716				goto done2;
717			}
718		}
719#endif
720		DPRINTF(("msqid %d is available\n", msqid));
721		msqkptr->u.msg_perm.key = key;
722		msqkptr->u.msg_perm.cuid = cred->cr_uid;
723		msqkptr->u.msg_perm.uid = cred->cr_uid;
724		msqkptr->u.msg_perm.cgid = cred->cr_gid;
725		msqkptr->u.msg_perm.gid = cred->cr_gid;
726		msqkptr->u.msg_perm.mode = (msgflg & 0777);
727		msqkptr->cred = crhold(cred);
728		/* Make sure that the returned msqid is unique */
729		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
730		msqkptr->u.msg_first = NULL;
731		msqkptr->u.msg_last = NULL;
732		msqkptr->u.msg_cbytes = 0;
733		msqkptr->u.msg_qnum = 0;
734		msqkptr->u.msg_qbytes = msginfo.msgmnb;
735		msqkptr->u.msg_lspid = 0;
736		msqkptr->u.msg_lrpid = 0;
737		msqkptr->u.msg_stime = 0;
738		msqkptr->u.msg_rtime = 0;
739		msqkptr->u.msg_ctime = time_second;
740#ifdef MAC
741		mac_sysvmsq_create(cred, msqkptr);
742#endif
743	} else {
744		DPRINTF(("didn't find it and wasn't asked to create it\n"));
745		error = ENOENT;
746		goto done2;
747	}
748
749found:
750	/* Construct the unique msqid */
751	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
752done2:
753	mtx_unlock(&msq_mtx);
754	return (error);
755}
756
757#ifndef _SYS_SYSPROTO_H_
758struct msgsnd_args {
759	int	msqid;
760	const void	*msgp;
761	size_t	msgsz;
762	int	msgflg;
763};
764#endif
765int
766kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
767	struct thread *td;
768	int msqid;
769	const void *msgp;	/* XXX msgp is actually mtext. */
770	size_t msgsz;
771	int msgflg;
772	long mtype;
773{
774	int msqix, segs_needed, error = 0;
775	register struct msqid_kernel *msqkptr;
776	register struct msg *msghdr;
777	struct prison *rpr;
778	short next;
779#ifdef RACCT
780	size_t saved_msgsz;
781#endif
782
783	rpr = msg_find_prison(td->td_ucred);
784	if (rpr == NULL)
785		return (ENOSYS);
786
787	mtx_lock(&msq_mtx);
788	msqix = IPCID_TO_IX(msqid);
789
790	if (msqix < 0 || msqix >= msginfo.msgmni) {
791		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
792		    msginfo.msgmni));
793		error = EINVAL;
794		goto done2;
795	}
796
797	msqkptr = &msqids[msqix];
798	if (msqkptr->u.msg_qbytes == 0) {
799		DPRINTF(("no such message queue id\n"));
800		error = EINVAL;
801		goto done2;
802	}
803	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
804		DPRINTF(("wrong sequence number\n"));
805		error = EINVAL;
806		goto done2;
807	}
808
809	if ((error = msq_prison_cansee(rpr, msqkptr))) {
810		DPRINTF(("requester can't see prison\n"));
811		goto done2;
812	}
813
814	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
815		DPRINTF(("requester doesn't have write access\n"));
816		goto done2;
817	}
818
819#ifdef MAC
820	error = mac_sysvmsq_check_msqsnd(td->td_ucred, msqkptr);
821	if (error != 0)
822		goto done2;
823#endif
824
825#ifdef RACCT
826	if (racct_enable) {
827		PROC_LOCK(td->td_proc);
828		if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) {
829			PROC_UNLOCK(td->td_proc);
830			error = EAGAIN;
831			goto done2;
832		}
833		saved_msgsz = msgsz;
834		if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) {
835			racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
836			PROC_UNLOCK(td->td_proc);
837			error = EAGAIN;
838			goto done2;
839		}
840		PROC_UNLOCK(td->td_proc);
841	}
842#endif
843
844	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
845	DPRINTF(("msgsz=%zu, msgssz=%d, segs_needed=%d\n", msgsz,
846	    msginfo.msgssz, segs_needed));
847	for (;;) {
848		int need_more_resources = 0;
849
850		/*
851		 * check msgsz
852		 * (inside this loop in case msg_qbytes changes while we sleep)
853		 */
854
855		if (msgsz > msqkptr->u.msg_qbytes) {
856			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
857			error = EINVAL;
858			goto done3;
859		}
860
861		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
862			DPRINTF(("msqid is locked\n"));
863			need_more_resources = 1;
864		}
865		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
866			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
867			need_more_resources = 1;
868		}
869		if (segs_needed > nfree_msgmaps) {
870			DPRINTF(("segs_needed > nfree_msgmaps\n"));
871			need_more_resources = 1;
872		}
873		if (free_msghdrs == NULL) {
874			DPRINTF(("no more msghdrs\n"));
875			need_more_resources = 1;
876		}
877
878		if (need_more_resources) {
879			int we_own_it;
880
881			if ((msgflg & IPC_NOWAIT) != 0) {
882				DPRINTF(("need more resources but caller "
883				    "doesn't want to wait\n"));
884				error = EAGAIN;
885				goto done3;
886			}
887
888			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
889				DPRINTF(("we don't own the msqid_ds\n"));
890				we_own_it = 0;
891			} else {
892				/* Force later arrivals to wait for our
893				   request */
894				DPRINTF(("we own the msqid_ds\n"));
895				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
896				we_own_it = 1;
897			}
898			DPRINTF(("msgsnd:  goodnight\n"));
899			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
900			    "msgsnd", hz);
901			DPRINTF(("msgsnd:  good morning, error=%d\n", error));
902			if (we_own_it)
903				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
904			if (error == EWOULDBLOCK) {
905				DPRINTF(("msgsnd:  timed out\n"));
906				continue;
907			}
908			if (error != 0) {
909				DPRINTF(("msgsnd:  interrupted system call\n"));
910				error = EINTR;
911				goto done3;
912			}
913
914			/*
915			 * Make sure that the msq queue still exists
916			 */
917
918			if (msqkptr->u.msg_qbytes == 0) {
919				DPRINTF(("msqid deleted\n"));
920				error = EIDRM;
921				goto done3;
922			}
923
924		} else {
925			DPRINTF(("got all the resources that we need\n"));
926			break;
927		}
928	}
929
930	/*
931	 * We have the resources that we need.
932	 * Make sure!
933	 */
934
935	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
936		panic("msg_perm.mode & MSG_LOCKED");
937	if (segs_needed > nfree_msgmaps)
938		panic("segs_needed > nfree_msgmaps");
939	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
940		panic("msgsz + msg_cbytes > msg_qbytes");
941	if (free_msghdrs == NULL)
942		panic("no more msghdrs");
943
944	/*
945	 * Re-lock the msqid_ds in case we page-fault when copying in the
946	 * message
947	 */
948
949	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
950		panic("msqid_ds is already locked");
951	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
952
953	/*
954	 * Allocate a message header
955	 */
956
957	msghdr = free_msghdrs;
958	free_msghdrs = msghdr->msg_next;
959	msghdr->msg_spot = -1;
960	msghdr->msg_ts = msgsz;
961	msghdr->msg_type = mtype;
962#ifdef MAC
963	/*
964	 * XXXMAC: Should the mac_sysvmsq_check_msgmsq check follow here
965	 * immediately?  Or, should it be checked just before the msg is
966	 * enqueued in the msgq (as it is done now)?
967	 */
968	mac_sysvmsg_create(td->td_ucred, msqkptr, msghdr);
969#endif
970
971	/*
972	 * Allocate space for the message
973	 */
974
975	while (segs_needed > 0) {
976		if (nfree_msgmaps <= 0)
977			panic("not enough msgmaps");
978		if (free_msgmaps == -1)
979			panic("nil free_msgmaps");
980		next = free_msgmaps;
981		if (next <= -1)
982			panic("next too low #1");
983		if (next >= msginfo.msgseg)
984			panic("next out of range #1");
985		DPRINTF(("allocating segment %d to message\n", next));
986		free_msgmaps = msgmaps[next].next;
987		nfree_msgmaps--;
988		msgmaps[next].next = msghdr->msg_spot;
989		msghdr->msg_spot = next;
990		segs_needed--;
991	}
992
993	/*
994	 * Validate the message type
995	 */
996
997	if (msghdr->msg_type < 1) {
998		msg_freehdr(msghdr);
999		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
1000		wakeup(msqkptr);
1001		DPRINTF(("mtype (%ld) < 1\n", msghdr->msg_type));
1002		error = EINVAL;
1003		goto done3;
1004	}
1005
1006	/*
1007	 * Copy in the message body
1008	 */
1009
1010	next = msghdr->msg_spot;
1011	while (msgsz > 0) {
1012		size_t tlen;
1013		if (msgsz > msginfo.msgssz)
1014			tlen = msginfo.msgssz;
1015		else
1016			tlen = msgsz;
1017		if (next <= -1)
1018			panic("next too low #2");
1019		if (next >= msginfo.msgseg)
1020			panic("next out of range #2");
1021		mtx_unlock(&msq_mtx);
1022		if ((error = copyin(msgp, &msgpool[next * msginfo.msgssz],
1023		    tlen)) != 0) {
1024			mtx_lock(&msq_mtx);
1025			DPRINTF(("error %d copying in message segment\n",
1026			    error));
1027			msg_freehdr(msghdr);
1028			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
1029			wakeup(msqkptr);
1030			goto done3;
1031		}
1032		mtx_lock(&msq_mtx);
1033		msgsz -= tlen;
1034		msgp = (const char *)msgp + tlen;
1035		next = msgmaps[next].next;
1036	}
1037	if (next != -1)
1038		panic("didn't use all the msg segments");
1039
1040	/*
1041	 * We've got the message.  Unlock the msqid_ds.
1042	 */
1043
1044	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
1045
1046	/*
1047	 * Make sure that the msqid_ds is still allocated.
1048	 */
1049
1050	if (msqkptr->u.msg_qbytes == 0) {
1051		msg_freehdr(msghdr);
1052		wakeup(msqkptr);
1053		error = EIDRM;
1054		goto done3;
1055	}
1056
1057#ifdef MAC
1058	/*
1059	 * Note: Since the task/thread allocates the msghdr and usually
1060	 * primes it with its own MAC label, for a majority of policies, it
1061	 * won't be necessary to check whether the msghdr has access
1062	 * permissions to the msgq.  The mac_sysvmsq_check_msqsnd check would
1063	 * suffice in that case.  However, this hook may be required where
1064	 * individual policies derive a non-identical label for the msghdr
1065	 * from the current thread label and may want to check the msghdr
1066	 * enqueue permissions, along with read/write permissions to the
1067	 * msgq.
1068	 */
1069	error = mac_sysvmsq_check_msgmsq(td->td_ucred, msghdr, msqkptr);
1070	if (error != 0) {
1071		msg_freehdr(msghdr);
1072		wakeup(msqkptr);
1073		goto done3;
1074	}
1075#endif
1076
1077	/*
1078	 * Put the message into the queue
1079	 */
1080	if (msqkptr->u.msg_first == NULL) {
1081		msqkptr->u.msg_first = msghdr;
1082		msqkptr->u.msg_last = msghdr;
1083	} else {
1084		msqkptr->u.msg_last->msg_next = msghdr;
1085		msqkptr->u.msg_last = msghdr;
1086	}
1087	msqkptr->u.msg_last->msg_next = NULL;
1088
1089	msqkptr->u.msg_cbytes += msghdr->msg_ts;
1090	msqkptr->u.msg_qnum++;
1091	msqkptr->u.msg_lspid = td->td_proc->p_pid;
1092	msqkptr->u.msg_stime = time_second;
1093
1094	wakeup(msqkptr);
1095	td->td_retval[0] = 0;
1096done3:
1097#ifdef RACCT
1098	if (racct_enable && error != 0) {
1099		PROC_LOCK(td->td_proc);
1100		racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
1101		racct_sub(td->td_proc, RACCT_MSGQSIZE, saved_msgsz);
1102		PROC_UNLOCK(td->td_proc);
1103	}
1104#endif
1105done2:
1106	mtx_unlock(&msq_mtx);
1107	return (error);
1108}
1109
1110int
1111sys_msgsnd(td, uap)
1112	struct thread *td;
1113	register struct msgsnd_args *uap;
1114{
1115	int error;
1116	long mtype;
1117
1118	DPRINTF(("call to msgsnd(%d, %p, %zu, %d)\n", uap->msqid, uap->msgp,
1119	    uap->msgsz, uap->msgflg));
1120
1121	if ((error = copyin(uap->msgp, &mtype, sizeof(mtype))) != 0) {
1122		DPRINTF(("error %d copying the message type\n", error));
1123		return (error);
1124	}
1125	return (kern_msgsnd(td, uap->msqid,
1126	    (const char *)uap->msgp + sizeof(mtype),
1127	    uap->msgsz, uap->msgflg, mtype));
1128}
1129
1130#ifndef _SYS_SYSPROTO_H_
1131struct msgrcv_args {
1132	int	msqid;
1133	void	*msgp;
1134	size_t	msgsz;
1135	long	msgtyp;
1136	int	msgflg;
1137};
1138#endif
1139int
1140kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
1141	struct thread *td;
1142	int msqid;
1143	void *msgp;	/* XXX msgp is actually mtext. */
1144	size_t msgsz;
1145	long msgtyp;
1146	int msgflg;
1147	long *mtype;
1148{
1149	size_t len;
1150	register struct msqid_kernel *msqkptr;
1151	register struct msg *msghdr;
1152	struct prison *rpr;
1153	int msqix, error = 0;
1154	short next;
1155
1156	rpr = msg_find_prison(td->td_ucred);
1157	if (rpr == NULL)
1158		return (ENOSYS);
1159
1160	msqix = IPCID_TO_IX(msqid);
1161
1162	if (msqix < 0 || msqix >= msginfo.msgmni) {
1163		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
1164		    msginfo.msgmni));
1165		return (EINVAL);
1166	}
1167
1168	msqkptr = &msqids[msqix];
1169	mtx_lock(&msq_mtx);
1170	if (msqkptr->u.msg_qbytes == 0) {
1171		DPRINTF(("no such message queue id\n"));
1172		error = EINVAL;
1173		goto done2;
1174	}
1175	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1176		DPRINTF(("wrong sequence number\n"));
1177		error = EINVAL;
1178		goto done2;
1179	}
1180
1181	if ((error = msq_prison_cansee(rpr, msqkptr))) {
1182		DPRINTF(("requester can't see prison\n"));
1183		goto done2;
1184	}
1185
1186	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
1187		DPRINTF(("requester doesn't have read access\n"));
1188		goto done2;
1189	}
1190
1191#ifdef MAC
1192	error = mac_sysvmsq_check_msqrcv(td->td_ucred, msqkptr);
1193	if (error != 0)
1194		goto done2;
1195#endif
1196
1197	msghdr = NULL;
1198	while (msghdr == NULL) {
1199		if (msgtyp == 0) {
1200			msghdr = msqkptr->u.msg_first;
1201			if (msghdr != NULL) {
1202				if (msgsz < msghdr->msg_ts &&
1203				    (msgflg & MSG_NOERROR) == 0) {
1204					DPRINTF(("first message on the queue "
1205					    "is too big (want %zu, got %d)\n",
1206					    msgsz, msghdr->msg_ts));
1207					error = E2BIG;
1208					goto done2;
1209				}
1210#ifdef MAC
1211				error = mac_sysvmsq_check_msgrcv(td->td_ucred,
1212				    msghdr);
1213				if (error != 0)
1214					goto done2;
1215#endif
1216				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
1217					msqkptr->u.msg_first = NULL;
1218					msqkptr->u.msg_last = NULL;
1219				} else {
1220					msqkptr->u.msg_first = msghdr->msg_next;
1221					if (msqkptr->u.msg_first == NULL)
1222						panic("msg_first/last screwed up #1");
1223				}
1224			}
1225		} else {
1226			struct msg *previous;
1227			struct msg **prev;
1228
1229			previous = NULL;
1230			prev = &(msqkptr->u.msg_first);
1231			while ((msghdr = *prev) != NULL) {
1232				/*
1233				 * Is this message's type an exact match or is
1234				 * this message's type less than or equal to
1235				 * the absolute value of a negative msgtyp?
1236				 * Note that the second half of this test can
1237				 * NEVER be true if msgtyp is positive since
1238				 * msg_type is always positive!
1239				 */
1240
1241				if (msgtyp == msghdr->msg_type ||
1242				    msghdr->msg_type <= -msgtyp) {
1243					DPRINTF(("found message type %ld, "
1244					    "requested %ld\n",
1245					    msghdr->msg_type, msgtyp));
1246					if (msgsz < msghdr->msg_ts &&
1247					    (msgflg & MSG_NOERROR) == 0) {
1248						DPRINTF(("requested message "
1249						    "on the queue is too big "
1250						    "(want %zu, got %hu)\n",
1251						    msgsz, msghdr->msg_ts));
1252						error = E2BIG;
1253						goto done2;
1254					}
1255#ifdef MAC
1256					error = mac_sysvmsq_check_msgrcv(
1257					    td->td_ucred, msghdr);
1258					if (error != 0)
1259						goto done2;
1260#endif
1261					*prev = msghdr->msg_next;
1262					if (msghdr == msqkptr->u.msg_last) {
1263						if (previous == NULL) {
1264							if (prev !=
1265							    &msqkptr->u.msg_first)
1266								panic("msg_first/last screwed up #2");
1267							msqkptr->u.msg_first =
1268							    NULL;
1269							msqkptr->u.msg_last =
1270							    NULL;
1271						} else {
1272							if (prev ==
1273							    &msqkptr->u.msg_first)
1274								panic("msg_first/last screwed up #3");
1275							msqkptr->u.msg_last =
1276							    previous;
1277						}
1278					}
1279					break;
1280				}
1281				previous = msghdr;
1282				prev = &(msghdr->msg_next);
1283			}
1284		}
1285
1286		/*
1287		 * We've either extracted the msghdr for the appropriate
1288		 * message or there isn't one.
1289		 * If there is one then bail out of this loop.
1290		 */
1291
1292		if (msghdr != NULL)
1293			break;
1294
1295		/*
1296		 * Hmph!  No message found.  Does the user want to wait?
1297		 */
1298
1299		if ((msgflg & IPC_NOWAIT) != 0) {
1300			DPRINTF(("no appropriate message found (msgtyp=%ld)\n",
1301			    msgtyp));
1302			/* The SVID says to return ENOMSG. */
1303			error = ENOMSG;
1304			goto done2;
1305		}
1306
1307		/*
1308		 * Wait for something to happen
1309		 */
1310
1311		DPRINTF(("msgrcv:  goodnight\n"));
1312		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1313		    "msgrcv", 0);
1314		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1315
1316		if (error != 0) {
1317			DPRINTF(("msgrcv:  interrupted system call\n"));
1318			error = EINTR;
1319			goto done2;
1320		}
1321
1322		/*
1323		 * Make sure that the msq queue still exists
1324		 */
1325
1326		if (msqkptr->u.msg_qbytes == 0 ||
1327		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1328			DPRINTF(("msqid deleted\n"));
1329			error = EIDRM;
1330			goto done2;
1331		}
1332	}
1333
1334	/*
1335	 * Return the message to the user.
1336	 *
1337	 * First, do the bookkeeping (before we risk being interrupted).
1338	 */
1339
1340	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1341	msqkptr->u.msg_qnum--;
1342	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1343	msqkptr->u.msg_rtime = time_second;
1344
1345	racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, 1);
1346	racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msghdr->msg_ts);
1347
1348	/*
1349	 * Make msgsz the actual amount that we'll be returning.
1350	 * Note that this effectively truncates the message if it is too long
1351	 * (since msgsz is never increased).
1352	 */
1353
1354	DPRINTF(("found a message, msgsz=%zu, msg_ts=%hu\n", msgsz,
1355	    msghdr->msg_ts));
1356	if (msgsz > msghdr->msg_ts)
1357		msgsz = msghdr->msg_ts;
1358	*mtype = msghdr->msg_type;
1359
1360	/*
1361	 * Return the segments to the user
1362	 */
1363
1364	next = msghdr->msg_spot;
1365	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1366		size_t tlen;
1367
1368		if (msgsz - len > msginfo.msgssz)
1369			tlen = msginfo.msgssz;
1370		else
1371			tlen = msgsz - len;
1372		if (next <= -1)
1373			panic("next too low #3");
1374		if (next >= msginfo.msgseg)
1375			panic("next out of range #3");
1376		mtx_unlock(&msq_mtx);
1377		error = copyout(&msgpool[next * msginfo.msgssz], msgp, tlen);
1378		mtx_lock(&msq_mtx);
1379		if (error != 0) {
1380			DPRINTF(("error (%d) copying out message segment\n",
1381			    error));
1382			msg_freehdr(msghdr);
1383			wakeup(msqkptr);
1384			goto done2;
1385		}
1386		msgp = (char *)msgp + tlen;
1387		next = msgmaps[next].next;
1388	}
1389
1390	/*
1391	 * Done, return the actual number of bytes copied out.
1392	 */
1393
1394	msg_freehdr(msghdr);
1395	wakeup(msqkptr);
1396	td->td_retval[0] = msgsz;
1397done2:
1398	mtx_unlock(&msq_mtx);
1399	return (error);
1400}
1401
1402int
1403sys_msgrcv(td, uap)
1404	struct thread *td;
1405	register struct msgrcv_args *uap;
1406{
1407	int error;
1408	long mtype;
1409
1410	DPRINTF(("call to msgrcv(%d, %p, %zu, %ld, %d)\n", uap->msqid,
1411	    uap->msgp, uap->msgsz, uap->msgtyp, uap->msgflg));
1412
1413	if ((error = kern_msgrcv(td, uap->msqid,
1414	    (char *)uap->msgp + sizeof(mtype), uap->msgsz,
1415	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1416		return (error);
1417	if ((error = copyout(&mtype, uap->msgp, sizeof(mtype))) != 0)
1418		DPRINTF(("error %d copying the message type\n", error));
1419	return (error);
1420}
1421
1422static int
1423sysctl_msqids(SYSCTL_HANDLER_ARGS)
1424{
1425	struct msqid_kernel tmsqk;
1426	struct prison *pr, *rpr;
1427	int error, i;
1428
1429	pr = req->td->td_ucred->cr_prison;
1430	rpr = msg_find_prison(req->td->td_ucred);
1431	error = 0;
1432	for (i = 0; i < msginfo.msgmni; i++) {
1433		mtx_lock(&msq_mtx);
1434		if (msqids[i].u.msg_qbytes == 0 || rpr == NULL ||
1435		    msq_prison_cansee(rpr, &msqids[i]) != 0)
1436			bzero(&tmsqk, sizeof(tmsqk));
1437		else {
1438			tmsqk = msqids[i];
1439			if (tmsqk.cred->cr_prison != pr)
1440				tmsqk.u.msg_perm.key = IPC_PRIVATE;
1441		}
1442		mtx_unlock(&msq_mtx);
1443		error = SYSCTL_OUT(req, &tmsqk, sizeof(tmsqk));
1444		if (error != 0)
1445			break;
1446	}
1447	return (error);
1448}
1449
1450SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1451    "Maximum message size");
1452SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0,
1453    "Number of message queue identifiers");
1454SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0,
1455    "Maximum number of bytes in a queue");
1456SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0,
1457    "Maximum number of messages in the system");
1458SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0,
1459    "Size of a message segment");
1460SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
1461    "Number of message segments");
1462SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids,
1463    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
1464    NULL, 0, sysctl_msqids, "",
1465    "Array of struct msqid_kernel for each potential message queue");
1466
1467static int
1468msg_prison_check(void *obj, void *data)
1469{
1470	struct prison *pr = obj;
1471	struct prison *prpr;
1472	struct vfsoptlist *opts = data;
1473	int error, jsys;
1474
1475	/*
1476	 * sysvmsg is a jailsys integer.
1477	 * It must be "disable" if the parent jail is disabled.
1478	 */
1479	error = vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys));
1480	if (error != ENOENT) {
1481		if (error != 0)
1482			return (error);
1483		switch (jsys) {
1484		case JAIL_SYS_DISABLE:
1485			break;
1486		case JAIL_SYS_NEW:
1487		case JAIL_SYS_INHERIT:
1488			prison_lock(pr->pr_parent);
1489			prpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
1490			prison_unlock(pr->pr_parent);
1491			if (prpr == NULL)
1492				return (EPERM);
1493			break;
1494		default:
1495			return (EINVAL);
1496		}
1497	}
1498
1499	return (0);
1500}
1501
1502static int
1503msg_prison_set(void *obj, void *data)
1504{
1505	struct prison *pr = obj;
1506	struct prison *tpr, *orpr, *nrpr, *trpr;
1507	struct vfsoptlist *opts = data;
1508	void *rsv;
1509	int jsys, descend;
1510
1511	/*
1512	 * sysvmsg controls which jail is the root of the associated msgs (this
1513	 * jail or same as the parent), or if the feature is available at all.
1514	 */
1515	if (vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)) == ENOENT)
1516		jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
1517		    ? JAIL_SYS_INHERIT
1518		    : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
1519		    ? JAIL_SYS_DISABLE
1520		    : -1;
1521	if (jsys == JAIL_SYS_DISABLE) {
1522		prison_lock(pr);
1523		orpr = osd_jail_get(pr, msg_prison_slot);
1524		if (orpr != NULL)
1525			osd_jail_del(pr, msg_prison_slot);
1526		prison_unlock(pr);
1527		if (orpr != NULL) {
1528			if (orpr == pr)
1529				msg_prison_cleanup(pr);
1530			/* Disable all child jails as well. */
1531			FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
1532				prison_lock(tpr);
1533				trpr = osd_jail_get(tpr, msg_prison_slot);
1534				if (trpr != NULL) {
1535					osd_jail_del(tpr, msg_prison_slot);
1536					prison_unlock(tpr);
1537					if (trpr == tpr)
1538						msg_prison_cleanup(tpr);
1539				} else {
1540					prison_unlock(tpr);
1541					descend = 0;
1542				}
1543			}
1544		}
1545	} else if (jsys != -1) {
1546		if (jsys == JAIL_SYS_NEW)
1547			nrpr = pr;
1548		else {
1549			prison_lock(pr->pr_parent);
1550			nrpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
1551			prison_unlock(pr->pr_parent);
1552		}
1553		rsv = osd_reserve(msg_prison_slot);
1554		prison_lock(pr);
1555		orpr = osd_jail_get(pr, msg_prison_slot);
1556		if (orpr != nrpr)
1557			(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
1558			    nrpr);
1559		else
1560			osd_free_reserved(rsv);
1561		prison_unlock(pr);
1562		if (orpr != nrpr) {
1563			if (orpr == pr)
1564				msg_prison_cleanup(pr);
1565			if (orpr != NULL) {
1566				/* Change child jails matching the old root, */
1567				FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
1568					prison_lock(tpr);
1569					trpr = osd_jail_get(tpr,
1570					    msg_prison_slot);
1571					if (trpr == orpr) {
1572						(void)osd_jail_set(tpr,
1573						    msg_prison_slot, nrpr);
1574						prison_unlock(tpr);
1575						if (trpr == tpr)
1576							msg_prison_cleanup(tpr);
1577					} else {
1578						prison_unlock(tpr);
1579						descend = 0;
1580					}
1581				}
1582			}
1583		}
1584	}
1585
1586	return (0);
1587}
1588
1589static int
1590msg_prison_get(void *obj, void *data)
1591{
1592	struct prison *pr = obj;
1593	struct prison *rpr;
1594	struct vfsoptlist *opts = data;
1595	int error, jsys;
1596
1597	/* Set sysvmsg based on the jail's root prison. */
1598	prison_lock(pr);
1599	rpr = osd_jail_get(pr, msg_prison_slot);
1600	prison_unlock(pr);
1601	jsys = rpr == NULL ? JAIL_SYS_DISABLE
1602	    : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
1603	error = vfs_setopt(opts, "sysvmsg", &jsys, sizeof(jsys));
1604	if (error == ENOENT)
1605		error = 0;
1606	return (error);
1607}
1608
1609static int
1610msg_prison_remove(void *obj, void *data __unused)
1611{
1612	struct prison *pr = obj;
1613	struct prison *rpr;
1614
1615	prison_lock(pr);
1616	rpr = osd_jail_get(pr, msg_prison_slot);
1617	prison_unlock(pr);
1618	if (rpr == pr)
1619		msg_prison_cleanup(pr);
1620	return (0);
1621}
1622
1623static void
1624msg_prison_cleanup(struct prison *pr)
1625{
1626	struct msqid_kernel *msqkptr;
1627	int i;
1628
1629	/* Remove any msqs that belong to this jail. */
1630	mtx_lock(&msq_mtx);
1631	for (i = 0; i < msginfo.msgmni; i++) {
1632		msqkptr = &msqids[i];
1633		if (msqkptr->u.msg_qbytes != 0 &&
1634		    msqkptr->cred != NULL && msqkptr->cred->cr_prison == pr)
1635			msq_remove(msqkptr);
1636	}
1637	mtx_unlock(&msq_mtx);
1638}
1639
1640SYSCTL_JAIL_PARAM_SYS_NODE(sysvmsg, CTLFLAG_RW, "SYSV message queues");
1641
1642#ifdef COMPAT_FREEBSD32
1643int
1644freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap)
1645{
1646
1647#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1648    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1649	switch (uap->which) {
1650	case 0:
1651		return (freebsd7_freebsd32_msgctl(td,
1652		    (struct freebsd7_freebsd32_msgctl_args *)&uap->a2));
1653	case 2:
1654		return (freebsd32_msgsnd(td,
1655		    (struct freebsd32_msgsnd_args *)&uap->a2));
1656	case 3:
1657		return (freebsd32_msgrcv(td,
1658		    (struct freebsd32_msgrcv_args *)&uap->a2));
1659	default:
1660		return (sys_msgsys(td, (struct msgsys_args *)uap));
1661	}
1662#else
1663	return (nosys(td, NULL));
1664#endif
1665}
1666
1667#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1668    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1669int
1670freebsd7_freebsd32_msgctl(struct thread *td,
1671    struct freebsd7_freebsd32_msgctl_args *uap)
1672{
1673	struct msqid_ds msqbuf;
1674	struct msqid_ds32_old msqbuf32;
1675	int error;
1676
1677	if (uap->cmd == IPC_SET) {
1678		error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32));
1679		if (error)
1680			return (error);
1681		freebsd32_ipcperm_old_in(&msqbuf32.msg_perm, &msqbuf.msg_perm);
1682		PTRIN_CP(msqbuf32, msqbuf, msg_first);
1683		PTRIN_CP(msqbuf32, msqbuf, msg_last);
1684		CP(msqbuf32, msqbuf, msg_cbytes);
1685		CP(msqbuf32, msqbuf, msg_qnum);
1686		CP(msqbuf32, msqbuf, msg_qbytes);
1687		CP(msqbuf32, msqbuf, msg_lspid);
1688		CP(msqbuf32, msqbuf, msg_lrpid);
1689		CP(msqbuf32, msqbuf, msg_stime);
1690		CP(msqbuf32, msqbuf, msg_rtime);
1691		CP(msqbuf32, msqbuf, msg_ctime);
1692	}
1693	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1694	if (error)
1695		return (error);
1696	if (uap->cmd == IPC_STAT) {
1697		bzero(&msqbuf32, sizeof(msqbuf32));
1698		freebsd32_ipcperm_old_out(&msqbuf.msg_perm, &msqbuf32.msg_perm);
1699		PTROUT_CP(msqbuf, msqbuf32, msg_first);
1700		PTROUT_CP(msqbuf, msqbuf32, msg_last);
1701		CP(msqbuf, msqbuf32, msg_cbytes);
1702		CP(msqbuf, msqbuf32, msg_qnum);
1703		CP(msqbuf, msqbuf32, msg_qbytes);
1704		CP(msqbuf, msqbuf32, msg_lspid);
1705		CP(msqbuf, msqbuf32, msg_lrpid);
1706		CP(msqbuf, msqbuf32, msg_stime);
1707		CP(msqbuf, msqbuf32, msg_rtime);
1708		CP(msqbuf, msqbuf32, msg_ctime);
1709		error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32));
1710	}
1711	return (error);
1712}
1713#endif
1714
1715int
1716freebsd32_msgctl(struct thread *td, struct freebsd32_msgctl_args *uap)
1717{
1718	struct msqid_ds msqbuf;
1719	struct msqid_ds32 msqbuf32;
1720	int error;
1721
1722	if (uap->cmd == IPC_SET) {
1723		error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32));
1724		if (error)
1725			return (error);
1726		freebsd32_ipcperm_in(&msqbuf32.msg_perm, &msqbuf.msg_perm);
1727		PTRIN_CP(msqbuf32, msqbuf, msg_first);
1728		PTRIN_CP(msqbuf32, msqbuf, msg_last);
1729		CP(msqbuf32, msqbuf, msg_cbytes);
1730		CP(msqbuf32, msqbuf, msg_qnum);
1731		CP(msqbuf32, msqbuf, msg_qbytes);
1732		CP(msqbuf32, msqbuf, msg_lspid);
1733		CP(msqbuf32, msqbuf, msg_lrpid);
1734		CP(msqbuf32, msqbuf, msg_stime);
1735		CP(msqbuf32, msqbuf, msg_rtime);
1736		CP(msqbuf32, msqbuf, msg_ctime);
1737	}
1738	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1739	if (error)
1740		return (error);
1741	if (uap->cmd == IPC_STAT) {
1742		freebsd32_ipcperm_out(&msqbuf.msg_perm, &msqbuf32.msg_perm);
1743		PTROUT_CP(msqbuf, msqbuf32, msg_first);
1744		PTROUT_CP(msqbuf, msqbuf32, msg_last);
1745		CP(msqbuf, msqbuf32, msg_cbytes);
1746		CP(msqbuf, msqbuf32, msg_qnum);
1747		CP(msqbuf, msqbuf32, msg_qbytes);
1748		CP(msqbuf, msqbuf32, msg_lspid);
1749		CP(msqbuf, msqbuf32, msg_lrpid);
1750		CP(msqbuf, msqbuf32, msg_stime);
1751		CP(msqbuf, msqbuf32, msg_rtime);
1752		CP(msqbuf, msqbuf32, msg_ctime);
1753		error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32));
1754	}
1755	return (error);
1756}
1757
1758int
1759freebsd32_msgsnd(struct thread *td, struct freebsd32_msgsnd_args *uap)
1760{
1761	const void *msgp;
1762	long mtype;
1763	int32_t mtype32;
1764	int error;
1765
1766	msgp = PTRIN(uap->msgp);
1767	if ((error = copyin(msgp, &mtype32, sizeof(mtype32))) != 0)
1768		return (error);
1769	mtype = mtype32;
1770	return (kern_msgsnd(td, uap->msqid,
1771	    (const char *)msgp + sizeof(mtype32),
1772	    uap->msgsz, uap->msgflg, mtype));
1773}
1774
1775int
1776freebsd32_msgrcv(struct thread *td, struct freebsd32_msgrcv_args *uap)
1777{
1778	void *msgp;
1779	long mtype;
1780	int32_t mtype32;
1781	int error;
1782
1783	msgp = PTRIN(uap->msgp);
1784	if ((error = kern_msgrcv(td, uap->msqid,
1785	    (char *)msgp + sizeof(mtype32), uap->msgsz,
1786	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1787		return (error);
1788	mtype32 = (int32_t)mtype;
1789	return (copyout(&mtype32, msgp, sizeof(mtype32)));
1790}
1791#endif
1792
1793#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1794    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1795
1796/* XXX casting to (sy_call_t *) is bogus, as usual. */
1797static sy_call_t *msgcalls[] = {
1798	(sy_call_t *)freebsd7_msgctl, (sy_call_t *)sys_msgget,
1799	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
1800};
1801
1802/*
1803 * Entry point for all MSG calls.
1804 */
1805int
1806sys_msgsys(td, uap)
1807	struct thread *td;
1808	/* XXX actually varargs. */
1809	struct msgsys_args /* {
1810		int	which;
1811		int	a2;
1812		int	a3;
1813		int	a4;
1814		int	a5;
1815		int	a6;
1816	} */ *uap;
1817{
1818	int error;
1819
1820	if (uap->which < 0 ||
1821	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
1822		return (EINVAL);
1823	error = (*msgcalls[uap->which])(td, &uap->a2);
1824	return (error);
1825}
1826
1827#ifndef CP
1828#define CP(src, dst, fld)	do { (dst).fld = (src).fld; } while (0)
1829#endif
1830
1831#ifndef _SYS_SYSPROTO_H_
1832struct freebsd7_msgctl_args {
1833	int	msqid;
1834	int	cmd;
1835	struct	msqid_ds_old *buf;
1836};
1837#endif
1838int
1839freebsd7_msgctl(td, uap)
1840	struct thread *td;
1841	struct freebsd7_msgctl_args *uap;
1842{
1843	struct msqid_ds_old msqold;
1844	struct msqid_ds msqbuf;
1845	int error;
1846
1847	DPRINTF(("call to freebsd7_msgctl(%d, %d, %p)\n", uap->msqid, uap->cmd,
1848	    uap->buf));
1849	if (uap->cmd == IPC_SET) {
1850		error = copyin(uap->buf, &msqold, sizeof(msqold));
1851		if (error)
1852			return (error);
1853		ipcperm_old2new(&msqold.msg_perm, &msqbuf.msg_perm);
1854		CP(msqold, msqbuf, msg_first);
1855		CP(msqold, msqbuf, msg_last);
1856		CP(msqold, msqbuf, msg_cbytes);
1857		CP(msqold, msqbuf, msg_qnum);
1858		CP(msqold, msqbuf, msg_qbytes);
1859		CP(msqold, msqbuf, msg_lspid);
1860		CP(msqold, msqbuf, msg_lrpid);
1861		CP(msqold, msqbuf, msg_stime);
1862		CP(msqold, msqbuf, msg_rtime);
1863		CP(msqold, msqbuf, msg_ctime);
1864	}
1865	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1866	if (error)
1867		return (error);
1868	if (uap->cmd == IPC_STAT) {
1869		bzero(&msqold, sizeof(msqold));
1870		ipcperm_new2old(&msqbuf.msg_perm, &msqold.msg_perm);
1871		CP(msqbuf, msqold, msg_first);
1872		CP(msqbuf, msqold, msg_last);
1873		CP(msqbuf, msqold, msg_cbytes);
1874		CP(msqbuf, msqold, msg_qnum);
1875		CP(msqbuf, msqold, msg_qbytes);
1876		CP(msqbuf, msqold, msg_lspid);
1877		CP(msqbuf, msqold, msg_lrpid);
1878		CP(msqbuf, msqold, msg_stime);
1879		CP(msqbuf, msqold, msg_rtime);
1880		CP(msqbuf, msqold, msg_ctime);
1881		error = copyout(&msqold, uap->buf, sizeof(struct msqid_ds_old));
1882	}
1883	return (error);
1884}
1885
1886#undef CP
1887
1888#endif	/* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 ||
1889	   COMPAT_FREEBSD7 */
1890