1/*-
2 * Implementation of SVID messages
3 *
4 * Author:  Daniel Boulet
5 *
6 * Copyright 1993 Daniel Boulet and RTMX Inc.
7 *
8 * This system call was implemented by Daniel Boulet under contract from RTMX.
9 *
10 * Redistribution and use in source forms, with and without modification,
11 * are permitted provided that this entire comment appears intact.
12 *
13 * Redistribution in binary form may occur without any restrictions.
14 * Obviously, it would be nice if you gave credit where credit is due
15 * but requiring it would be too onerous.
16 *
17 * This software is provided ``AS IS'' without any warranties of any kind.
18 */
19/*-
20 * Copyright (c) 2003-2005 McAfee, Inc.
21 * All rights reserved.
22 *
23 * This software was developed for the FreeBSD Project in part by McAfee
24 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
25 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
26 * program.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 *    notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 *    notice, this list of conditions and the following disclaimer in the
35 *    documentation and/or other materials provided with the distribution.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: stable/11/sys/kern/sysv_msg.c 331643 2018-03-27 18:52:27Z dim $");
52
53#include "opt_compat.h"
54#include "opt_sysvipc.h"
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/sysproto.h>
59#include <sys/kernel.h>
60#include <sys/priv.h>
61#include <sys/proc.h>
62#include <sys/lock.h>
63#include <sys/mutex.h>
64#include <sys/module.h>
65#include <sys/mount.h>
66#include <sys/msg.h>
67#include <sys/racct.h>
68#include <sys/sx.h>
69#include <sys/syscall.h>
70#include <sys/syscallsubr.h>
71#include <sys/sysent.h>
72#include <sys/sysctl.h>
73#include <sys/malloc.h>
74#include <sys/jail.h>
75
76#include <security/mac/mac_framework.h>
77
78FEATURE(sysv_msg, "System V message queues support");
79
80static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
81
82static int msginit(void);
83static int msgunload(void);
84static int sysvmsg_modload(struct module *, int, void *);
85static void msq_remove(struct msqid_kernel *);
86static struct prison *msg_find_prison(struct ucred *);
87static int msq_prison_cansee(struct prison *, struct msqid_kernel *);
88static int msg_prison_check(void *, void *);
89static int msg_prison_set(void *, void *);
90static int msg_prison_get(void *, void *);
91static int msg_prison_remove(void *, void *);
92static void msg_prison_cleanup(struct prison *);
93
94
95#ifdef MSG_DEBUG
96#define DPRINTF(a)	printf a
97#else
98#define DPRINTF(a)	(void)0
99#endif
100
101static void msg_freehdr(struct msg *msghdr);
102
103#ifndef MSGSSZ
104#define MSGSSZ	8		/* Each segment must be 2^N long */
105#endif
106#ifndef MSGSEG
107#define MSGSEG	2048		/* must be less than 32767 */
108#endif
109#define MSGMAX	(MSGSSZ*MSGSEG)
110#ifndef MSGMNB
111#define MSGMNB	2048		/* max # of bytes in a queue */
112#endif
113#ifndef MSGMNI
114#define MSGMNI	40
115#endif
116#ifndef MSGTQL
117#define MSGTQL	40
118#endif
119
120/*
121 * Based on the configuration parameters described in an SVR2 (yes, two)
122 * config(1m) man page.
123 *
124 * Each message is broken up and stored in segments that are msgssz bytes
125 * long.  For efficiency reasons, this should be a power of two.  Also,
126 * it doesn't make sense if it is less than 8 or greater than about 256.
127 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
128 * two between 8 and 1024 inclusive (and panic's if it isn't).
129 */
130struct msginfo msginfo = {
131                MSGMAX,         /* max chars in a message */
132                MSGMNI,         /* # of message queue identifiers */
133                MSGMNB,         /* max chars in a queue */
134                MSGTQL,         /* max messages in system */
135                MSGSSZ,         /* size of a message segment */
136                		/* (must be small power of 2 greater than 4) */
137                MSGSEG          /* number of message segments */
138};
139
140/*
141 * macros to convert between msqid_ds's and msqid's.
142 * (specific to this implementation)
143 */
144#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
145#define MSQID_IX(id)	((id) & 0xffff)
146#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
147
148/*
149 * The rest of this file is specific to this particular implementation.
150 */
151
152struct msgmap {
153	short	next;		/* next segment in buffer */
154    				/* -1 -> available */
155    				/* 0..(MSGSEG-1) -> index of next segment */
156};
157
158#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
159
160static int nfree_msgmaps;	/* # of free map entries */
161static short free_msgmaps;	/* head of linked list of free map entries */
162static struct msg *free_msghdrs;/* list of free msg headers */
163static char *msgpool;		/* MSGMAX byte long msg buffer pool */
164static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
165static struct msg *msghdrs;	/* MSGTQL msg headers */
166static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
167static struct mtx msq_mtx;	/* global mutex for message queues. */
168static unsigned msg_prison_slot;/* prison OSD slot */
169
170static struct syscall_helper_data msg_syscalls[] = {
171	SYSCALL_INIT_HELPER(msgctl),
172	SYSCALL_INIT_HELPER(msgget),
173	SYSCALL_INIT_HELPER(msgsnd),
174	SYSCALL_INIT_HELPER(msgrcv),
175#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
176    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
177	SYSCALL_INIT_HELPER(msgsys),
178	SYSCALL_INIT_HELPER_COMPAT(freebsd7_msgctl),
179#endif
180	SYSCALL_INIT_LAST
181};
182
183#ifdef COMPAT_FREEBSD32
184#include <compat/freebsd32/freebsd32.h>
185#include <compat/freebsd32/freebsd32_ipc.h>
186#include <compat/freebsd32/freebsd32_proto.h>
187#include <compat/freebsd32/freebsd32_signal.h>
188#include <compat/freebsd32/freebsd32_syscall.h>
189#include <compat/freebsd32/freebsd32_util.h>
190
191static struct syscall_helper_data msg32_syscalls[] = {
192	SYSCALL32_INIT_HELPER(freebsd32_msgctl),
193	SYSCALL32_INIT_HELPER(freebsd32_msgsnd),
194	SYSCALL32_INIT_HELPER(freebsd32_msgrcv),
195	SYSCALL32_INIT_HELPER_COMPAT(msgget),
196	SYSCALL32_INIT_HELPER(freebsd32_msgsys),
197#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
198    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
199	SYSCALL32_INIT_HELPER(freebsd7_freebsd32_msgctl),
200#endif
201	SYSCALL_INIT_LAST
202};
203#endif
204
205static int
206msginit()
207{
208	struct prison *pr;
209	void **rsv;
210	int i, error;
211	osd_method_t methods[PR_MAXMETHOD] = {
212	    [PR_METHOD_CHECK] =		msg_prison_check,
213	    [PR_METHOD_SET] =		msg_prison_set,
214	    [PR_METHOD_GET] =		msg_prison_get,
215	    [PR_METHOD_REMOVE] =	msg_prison_remove,
216	};
217
218	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
219	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
220	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
221	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
222	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
223	    M_WAITOK | M_ZERO);
224
225	/*
226	 * msginfo.msgssz should be a power of two for efficiency reasons.
227	 * It is also pretty silly if msginfo.msgssz is less than 8
228	 * or greater than about 256 so ...
229	 */
230
231	i = 8;
232	while (i < 1024 && i != msginfo.msgssz)
233		i <<= 1;
234    	if (i != msginfo.msgssz) {
235		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
236		    msginfo.msgssz));
237		panic("msginfo.msgssz not a small power of 2");
238	}
239
240	if (msginfo.msgseg > 32767) {
241		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
242		panic("msginfo.msgseg > 32767");
243	}
244
245	for (i = 0; i < msginfo.msgseg; i++) {
246		if (i > 0)
247			msgmaps[i-1].next = i;
248		msgmaps[i].next = -1;	/* implies entry is available */
249	}
250	free_msgmaps = 0;
251	nfree_msgmaps = msginfo.msgseg;
252
253	for (i = 0; i < msginfo.msgtql; i++) {
254		msghdrs[i].msg_type = 0;
255		if (i > 0)
256			msghdrs[i-1].msg_next = &msghdrs[i];
257		msghdrs[i].msg_next = NULL;
258#ifdef MAC
259		mac_sysvmsg_init(&msghdrs[i]);
260#endif
261    	}
262	free_msghdrs = &msghdrs[0];
263
264	for (i = 0; i < msginfo.msgmni; i++) {
265		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
266		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
267		msqids[i].u.msg_perm.mode = 0;
268#ifdef MAC
269		mac_sysvmsq_init(&msqids[i]);
270#endif
271	}
272	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
273
274	/* Set current prisons according to their allow.sysvipc. */
275	msg_prison_slot = osd_jail_register(NULL, methods);
276	rsv = osd_reserve(msg_prison_slot);
277	prison_lock(&prison0);
278	(void)osd_jail_set_reserved(&prison0, msg_prison_slot, rsv, &prison0);
279	prison_unlock(&prison0);
280	rsv = NULL;
281	sx_slock(&allprison_lock);
282	TAILQ_FOREACH(pr, &allprison, pr_list) {
283		if (rsv == NULL)
284			rsv = osd_reserve(msg_prison_slot);
285		prison_lock(pr);
286		if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
287			(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
288			    &prison0);
289			rsv = NULL;
290		}
291		prison_unlock(pr);
292	}
293	if (rsv != NULL)
294		osd_free_reserved(rsv);
295	sx_sunlock(&allprison_lock);
296
297	error = syscall_helper_register(msg_syscalls, SY_THR_STATIC_KLD);
298	if (error != 0)
299		return (error);
300#ifdef COMPAT_FREEBSD32
301	error = syscall32_helper_register(msg32_syscalls, SY_THR_STATIC_KLD);
302	if (error != 0)
303		return (error);
304#endif
305	return (0);
306}
307
308static int
309msgunload()
310{
311	struct msqid_kernel *msqkptr;
312	int msqid;
313#ifdef MAC
314	int i;
315#endif
316
317	syscall_helper_unregister(msg_syscalls);
318#ifdef COMPAT_FREEBSD32
319	syscall32_helper_unregister(msg32_syscalls);
320#endif
321
322	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
323		msqkptr = &msqids[msqid];
324		if (msqkptr->u.msg_qbytes != 0 ||
325		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
326			break;
327	}
328	if (msqid != msginfo.msgmni)
329		return (EBUSY);
330
331	if (msg_prison_slot != 0)
332		osd_jail_deregister(msg_prison_slot);
333#ifdef MAC
334	for (i = 0; i < msginfo.msgtql; i++)
335		mac_sysvmsg_destroy(&msghdrs[i]);
336	for (msqid = 0; msqid < msginfo.msgmni; msqid++)
337		mac_sysvmsq_destroy(&msqids[msqid]);
338#endif
339	free(msgpool, M_MSG);
340	free(msgmaps, M_MSG);
341	free(msghdrs, M_MSG);
342	free(msqids, M_MSG);
343	mtx_destroy(&msq_mtx);
344	return (0);
345}
346
347
348static int
349sysvmsg_modload(struct module *module, int cmd, void *arg)
350{
351	int error = 0;
352
353	switch (cmd) {
354	case MOD_LOAD:
355		error = msginit();
356		if (error != 0)
357			msgunload();
358		break;
359	case MOD_UNLOAD:
360		error = msgunload();
361		break;
362	case MOD_SHUTDOWN:
363		break;
364	default:
365		error = EINVAL;
366		break;
367	}
368	return (error);
369}
370
371static moduledata_t sysvmsg_mod = {
372	"sysvmsg",
373	&sysvmsg_modload,
374	NULL
375};
376
377DECLARE_MODULE(sysvmsg, sysvmsg_mod, SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
378MODULE_VERSION(sysvmsg, 1);
379
380static void
381msg_freehdr(msghdr)
382	struct msg *msghdr;
383{
384	while (msghdr->msg_ts > 0) {
385		short next;
386		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
387			panic("msghdr->msg_spot out of range");
388		next = msgmaps[msghdr->msg_spot].next;
389		msgmaps[msghdr->msg_spot].next = free_msgmaps;
390		free_msgmaps = msghdr->msg_spot;
391		nfree_msgmaps++;
392		msghdr->msg_spot = next;
393		if (msghdr->msg_ts >= msginfo.msgssz)
394			msghdr->msg_ts -= msginfo.msgssz;
395		else
396			msghdr->msg_ts = 0;
397	}
398	if (msghdr->msg_spot != -1)
399		panic("msghdr->msg_spot != -1");
400	msghdr->msg_next = free_msghdrs;
401	free_msghdrs = msghdr;
402#ifdef MAC
403	mac_sysvmsg_cleanup(msghdr);
404#endif
405}
406
407static void
408msq_remove(struct msqid_kernel *msqkptr)
409{
410	struct msg *msghdr;
411
412	racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
413	racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
414	racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
415	crfree(msqkptr->cred);
416	msqkptr->cred = NULL;
417
418	/* Free the message headers */
419	msghdr = msqkptr->u.msg_first;
420	while (msghdr != NULL) {
421		struct msg *msghdr_tmp;
422
423		/* Free the segments of each message */
424		msqkptr->u.msg_cbytes -= msghdr->msg_ts;
425		msqkptr->u.msg_qnum--;
426		msghdr_tmp = msghdr;
427		msghdr = msghdr->msg_next;
428		msg_freehdr(msghdr_tmp);
429	}
430
431	if (msqkptr->u.msg_cbytes != 0)
432		panic("msg_cbytes is screwed up");
433	if (msqkptr->u.msg_qnum != 0)
434		panic("msg_qnum is screwed up");
435
436	msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
437
438#ifdef MAC
439	mac_sysvmsq_cleanup(msqkptr);
440#endif
441
442	wakeup(msqkptr);
443}
444
445static struct prison *
446msg_find_prison(struct ucred *cred)
447{
448	struct prison *pr, *rpr;
449
450	pr = cred->cr_prison;
451	prison_lock(pr);
452	rpr = osd_jail_get(pr, msg_prison_slot);
453	prison_unlock(pr);
454	return rpr;
455}
456
457static int
458msq_prison_cansee(struct prison *rpr, struct msqid_kernel *msqkptr)
459{
460
461	if (msqkptr->cred == NULL ||
462	    !(rpr == msqkptr->cred->cr_prison ||
463	      prison_ischild(rpr, msqkptr->cred->cr_prison)))
464		return (EINVAL);
465	return (0);
466}
467
468#ifndef _SYS_SYSPROTO_H_
469struct msgctl_args {
470	int	msqid;
471	int	cmd;
472	struct	msqid_ds *buf;
473};
474#endif
475int
476sys_msgctl(struct thread *td, struct msgctl_args *uap)
477{
478	int msqid = uap->msqid;
479	int cmd = uap->cmd;
480	struct msqid_ds msqbuf;
481	int error;
482
483	DPRINTF(("call to msgctl(%d, %d, %p)\n", msqid, cmd, uap->buf));
484	if (cmd == IPC_SET &&
485	    (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0)
486		return (error);
487	error = kern_msgctl(td, msqid, cmd, &msqbuf);
488	if (cmd == IPC_STAT && error == 0)
489		error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds));
490	return (error);
491}
492
493int
494kern_msgctl(td, msqid, cmd, msqbuf)
495	struct thread *td;
496	int msqid;
497	int cmd;
498	struct msqid_ds *msqbuf;
499{
500	int rval, error, msqix;
501	struct msqid_kernel *msqkptr;
502	struct prison *rpr;
503
504	rpr = msg_find_prison(td->td_ucred);
505	if (rpr == NULL)
506		return (ENOSYS);
507
508	msqix = IPCID_TO_IX(msqid);
509
510	if (msqix < 0 || msqix >= msginfo.msgmni) {
511		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
512		    msginfo.msgmni));
513		return (EINVAL);
514	}
515
516	msqkptr = &msqids[msqix];
517
518	mtx_lock(&msq_mtx);
519	if (msqkptr->u.msg_qbytes == 0) {
520		DPRINTF(("no such msqid\n"));
521		error = EINVAL;
522		goto done2;
523	}
524	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
525		DPRINTF(("wrong sequence number\n"));
526		error = EINVAL;
527		goto done2;
528	}
529
530	error = msq_prison_cansee(rpr, msqkptr);
531	if (error != 0) {
532		DPRINTF(("requester can't see prison\n"));
533		goto done2;
534	}
535
536#ifdef MAC
537	error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
538	if (error != 0)
539		goto done2;
540#endif
541
542	error = 0;
543	rval = 0;
544
545	switch (cmd) {
546
547	case IPC_RMID:
548	{
549#ifdef MAC
550		struct msg *msghdr;
551#endif
552		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
553			goto done2;
554
555#ifdef MAC
556		/*
557		 * Check that the thread has MAC access permissions to
558		 * individual msghdrs.  Note: We need to do this in a
559		 * separate loop because the actual loop alters the
560		 * msq/msghdr info as it progresses, and there is no going
561		 * back if half the way through we discover that the
562		 * thread cannot free a certain msghdr.  The msq will get
563		 * into an inconsistent state.
564		 */
565		for (msghdr = msqkptr->u.msg_first; msghdr != NULL;
566		    msghdr = msghdr->msg_next) {
567			error = mac_sysvmsq_check_msgrmid(td->td_ucred, msghdr);
568			if (error != 0)
569				goto done2;
570		}
571#endif
572
573		msq_remove(msqkptr);
574	}
575
576		break;
577
578	case IPC_SET:
579		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
580			goto done2;
581		if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) {
582			error = priv_check(td, PRIV_IPC_MSGSIZE);
583			if (error)
584				goto done2;
585		}
586		if (msqbuf->msg_qbytes > msginfo.msgmnb) {
587			DPRINTF(("can't increase msg_qbytes beyond %d"
588			    "(truncating)\n", msginfo.msgmnb));
589			msqbuf->msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
590		}
591		if (msqbuf->msg_qbytes == 0) {
592			DPRINTF(("can't reduce msg_qbytes to 0\n"));
593			error = EINVAL;		/* non-standard errno! */
594			goto done2;
595		}
596		msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid;	/* change the owner */
597		msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid;	/* change the owner */
598		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
599		    (msqbuf->msg_perm.mode & 0777);
600		msqkptr->u.msg_qbytes = msqbuf->msg_qbytes;
601		msqkptr->u.msg_ctime = time_second;
602		break;
603
604	case IPC_STAT:
605		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
606			DPRINTF(("requester doesn't have read access\n"));
607			goto done2;
608		}
609		*msqbuf = msqkptr->u;
610		if (td->td_ucred->cr_prison != msqkptr->cred->cr_prison)
611			msqbuf->msg_perm.key = IPC_PRIVATE;
612		break;
613
614	default:
615		DPRINTF(("invalid command %d\n", cmd));
616		error = EINVAL;
617		goto done2;
618	}
619
620	if (error == 0)
621		td->td_retval[0] = rval;
622done2:
623	mtx_unlock(&msq_mtx);
624	return (error);
625}
626
627#ifndef _SYS_SYSPROTO_H_
628struct msgget_args {
629	key_t	key;
630	int	msgflg;
631};
632#endif
633
634int
635sys_msgget(struct thread *td, struct msgget_args *uap)
636{
637	int msqid, error = 0;
638	int key = uap->key;
639	int msgflg = uap->msgflg;
640	struct ucred *cred = td->td_ucred;
641	struct msqid_kernel *msqkptr = NULL;
642
643	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
644
645	if (msg_find_prison(cred) == NULL)
646		return (ENOSYS);
647
648	mtx_lock(&msq_mtx);
649	if (key != IPC_PRIVATE) {
650		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
651			msqkptr = &msqids[msqid];
652			if (msqkptr->u.msg_qbytes != 0 &&
653			    msqkptr->cred != NULL &&
654			    msqkptr->cred->cr_prison == cred->cr_prison &&
655			    msqkptr->u.msg_perm.key == key)
656				break;
657		}
658		if (msqid < msginfo.msgmni) {
659			DPRINTF(("found public key\n"));
660			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
661				DPRINTF(("not exclusive\n"));
662				error = EEXIST;
663				goto done2;
664			}
665			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
666			    msgflg & 0700))) {
667				DPRINTF(("requester doesn't have 0%o access\n",
668				    msgflg & 0700));
669				goto done2;
670			}
671#ifdef MAC
672			error = mac_sysvmsq_check_msqget(cred, msqkptr);
673			if (error != 0)
674				goto done2;
675#endif
676			goto found;
677		}
678	}
679
680	DPRINTF(("need to allocate the msqid_ds\n"));
681	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
682		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
683			/*
684			 * Look for an unallocated and unlocked msqid_ds.
685			 * msqid_ds's can be locked by msgsnd or msgrcv while
686			 * they are copying the message in/out.  We can't
687			 * re-use the entry until they release it.
688			 */
689			msqkptr = &msqids[msqid];
690			if (msqkptr->u.msg_qbytes == 0 &&
691			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
692				break;
693		}
694		if (msqid == msginfo.msgmni) {
695			DPRINTF(("no more msqid_ds's available\n"));
696			error = ENOSPC;
697			goto done2;
698		}
699#ifdef RACCT
700		if (racct_enable) {
701			PROC_LOCK(td->td_proc);
702			error = racct_add(td->td_proc, RACCT_NMSGQ, 1);
703			PROC_UNLOCK(td->td_proc);
704			if (error != 0) {
705				error = ENOSPC;
706				goto done2;
707			}
708		}
709#endif
710		DPRINTF(("msqid %d is available\n", msqid));
711		msqkptr->u.msg_perm.key = key;
712		msqkptr->u.msg_perm.cuid = cred->cr_uid;
713		msqkptr->u.msg_perm.uid = cred->cr_uid;
714		msqkptr->u.msg_perm.cgid = cred->cr_gid;
715		msqkptr->u.msg_perm.gid = cred->cr_gid;
716		msqkptr->u.msg_perm.mode = (msgflg & 0777);
717		msqkptr->cred = crhold(cred);
718		/* Make sure that the returned msqid is unique */
719		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
720		msqkptr->u.msg_first = NULL;
721		msqkptr->u.msg_last = NULL;
722		msqkptr->u.msg_cbytes = 0;
723		msqkptr->u.msg_qnum = 0;
724		msqkptr->u.msg_qbytes = msginfo.msgmnb;
725		msqkptr->u.msg_lspid = 0;
726		msqkptr->u.msg_lrpid = 0;
727		msqkptr->u.msg_stime = 0;
728		msqkptr->u.msg_rtime = 0;
729		msqkptr->u.msg_ctime = time_second;
730#ifdef MAC
731		mac_sysvmsq_create(cred, msqkptr);
732#endif
733	} else {
734		DPRINTF(("didn't find it and wasn't asked to create it\n"));
735		error = ENOENT;
736		goto done2;
737	}
738
739found:
740	/* Construct the unique msqid */
741	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
742done2:
743	mtx_unlock(&msq_mtx);
744	return (error);
745}
746
747#ifndef _SYS_SYSPROTO_H_
748struct msgsnd_args {
749	int	msqid;
750	const void	*msgp;	/* XXX msgp is actually mtext. */
751	size_t	msgsz;
752	int	msgflg;
753};
754#endif
755int
756kern_msgsnd(struct thread *td, int msqid, const void *msgp,
757    size_t msgsz, int msgflg, long mtype)
758{
759	int msqix, segs_needed, error = 0;
760	struct msqid_kernel *msqkptr;
761	struct msg *msghdr;
762	struct prison *rpr;
763	short next;
764#ifdef RACCT
765	size_t saved_msgsz;
766#endif
767
768	rpr = msg_find_prison(td->td_ucred);
769	if (rpr == NULL)
770		return (ENOSYS);
771
772	mtx_lock(&msq_mtx);
773	msqix = IPCID_TO_IX(msqid);
774
775	if (msqix < 0 || msqix >= msginfo.msgmni) {
776		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
777		    msginfo.msgmni));
778		error = EINVAL;
779		goto done2;
780	}
781
782	msqkptr = &msqids[msqix];
783	if (msqkptr->u.msg_qbytes == 0) {
784		DPRINTF(("no such message queue id\n"));
785		error = EINVAL;
786		goto done2;
787	}
788	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
789		DPRINTF(("wrong sequence number\n"));
790		error = EINVAL;
791		goto done2;
792	}
793
794	if ((error = msq_prison_cansee(rpr, msqkptr))) {
795		DPRINTF(("requester can't see prison\n"));
796		goto done2;
797	}
798
799	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
800		DPRINTF(("requester doesn't have write access\n"));
801		goto done2;
802	}
803
804#ifdef MAC
805	error = mac_sysvmsq_check_msqsnd(td->td_ucred, msqkptr);
806	if (error != 0)
807		goto done2;
808#endif
809
810#ifdef RACCT
811	if (racct_enable) {
812		PROC_LOCK(td->td_proc);
813		if (racct_add(td->td_proc, RACCT_MSGQQUEUED, 1)) {
814			PROC_UNLOCK(td->td_proc);
815			error = EAGAIN;
816			goto done2;
817		}
818		saved_msgsz = msgsz;
819		if (racct_add(td->td_proc, RACCT_MSGQSIZE, msgsz)) {
820			racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
821			PROC_UNLOCK(td->td_proc);
822			error = EAGAIN;
823			goto done2;
824		}
825		PROC_UNLOCK(td->td_proc);
826	}
827#endif
828
829	segs_needed = howmany(msgsz, msginfo.msgssz);
830	DPRINTF(("msgsz=%zu, msgssz=%d, segs_needed=%d\n", msgsz,
831	    msginfo.msgssz, segs_needed));
832	for (;;) {
833		int need_more_resources = 0;
834
835		/*
836		 * check msgsz
837		 * (inside this loop in case msg_qbytes changes while we sleep)
838		 */
839
840		if (msgsz > msqkptr->u.msg_qbytes) {
841			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
842			error = EINVAL;
843			goto done3;
844		}
845
846		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
847			DPRINTF(("msqid is locked\n"));
848			need_more_resources = 1;
849		}
850		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
851			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
852			need_more_resources = 1;
853		}
854		if (segs_needed > nfree_msgmaps) {
855			DPRINTF(("segs_needed > nfree_msgmaps\n"));
856			need_more_resources = 1;
857		}
858		if (free_msghdrs == NULL) {
859			DPRINTF(("no more msghdrs\n"));
860			need_more_resources = 1;
861		}
862
863		if (need_more_resources) {
864			int we_own_it;
865
866			if ((msgflg & IPC_NOWAIT) != 0) {
867				DPRINTF(("need more resources but caller "
868				    "doesn't want to wait\n"));
869				error = EAGAIN;
870				goto done3;
871			}
872
873			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
874				DPRINTF(("we don't own the msqid_ds\n"));
875				we_own_it = 0;
876			} else {
877				/* Force later arrivals to wait for our
878				   request */
879				DPRINTF(("we own the msqid_ds\n"));
880				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
881				we_own_it = 1;
882			}
883			DPRINTF(("msgsnd:  goodnight\n"));
884			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
885			    "msgsnd", hz);
886			DPRINTF(("msgsnd:  good morning, error=%d\n", error));
887			if (we_own_it)
888				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
889			if (error == EWOULDBLOCK) {
890				DPRINTF(("msgsnd:  timed out\n"));
891				continue;
892			}
893			if (error != 0) {
894				DPRINTF(("msgsnd:  interrupted system call\n"));
895				error = EINTR;
896				goto done3;
897			}
898
899			/*
900			 * Make sure that the msq queue still exists
901			 */
902
903			if (msqkptr->u.msg_qbytes == 0) {
904				DPRINTF(("msqid deleted\n"));
905				error = EIDRM;
906				goto done3;
907			}
908
909		} else {
910			DPRINTF(("got all the resources that we need\n"));
911			break;
912		}
913	}
914
915	/*
916	 * We have the resources that we need.
917	 * Make sure!
918	 */
919
920	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
921		panic("msg_perm.mode & MSG_LOCKED");
922	if (segs_needed > nfree_msgmaps)
923		panic("segs_needed > nfree_msgmaps");
924	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
925		panic("msgsz + msg_cbytes > msg_qbytes");
926	if (free_msghdrs == NULL)
927		panic("no more msghdrs");
928
929	/*
930	 * Re-lock the msqid_ds in case we page-fault when copying in the
931	 * message
932	 */
933
934	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
935		panic("msqid_ds is already locked");
936	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
937
938	/*
939	 * Allocate a message header
940	 */
941
942	msghdr = free_msghdrs;
943	free_msghdrs = msghdr->msg_next;
944	msghdr->msg_spot = -1;
945	msghdr->msg_ts = msgsz;
946	msghdr->msg_type = mtype;
947#ifdef MAC
948	/*
949	 * XXXMAC: Should the mac_sysvmsq_check_msgmsq check follow here
950	 * immediately?  Or, should it be checked just before the msg is
951	 * enqueued in the msgq (as it is done now)?
952	 */
953	mac_sysvmsg_create(td->td_ucred, msqkptr, msghdr);
954#endif
955
956	/*
957	 * Allocate space for the message
958	 */
959
960	while (segs_needed > 0) {
961		if (nfree_msgmaps <= 0)
962			panic("not enough msgmaps");
963		if (free_msgmaps == -1)
964			panic("nil free_msgmaps");
965		next = free_msgmaps;
966		if (next <= -1)
967			panic("next too low #1");
968		if (next >= msginfo.msgseg)
969			panic("next out of range #1");
970		DPRINTF(("allocating segment %d to message\n", next));
971		free_msgmaps = msgmaps[next].next;
972		nfree_msgmaps--;
973		msgmaps[next].next = msghdr->msg_spot;
974		msghdr->msg_spot = next;
975		segs_needed--;
976	}
977
978	/*
979	 * Validate the message type
980	 */
981
982	if (msghdr->msg_type < 1) {
983		msg_freehdr(msghdr);
984		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
985		wakeup(msqkptr);
986		DPRINTF(("mtype (%ld) < 1\n", msghdr->msg_type));
987		error = EINVAL;
988		goto done3;
989	}
990
991	/*
992	 * Copy in the message body
993	 */
994
995	next = msghdr->msg_spot;
996	while (msgsz > 0) {
997		size_t tlen;
998		if (msgsz > msginfo.msgssz)
999			tlen = msginfo.msgssz;
1000		else
1001			tlen = msgsz;
1002		if (next <= -1)
1003			panic("next too low #2");
1004		if (next >= msginfo.msgseg)
1005			panic("next out of range #2");
1006		mtx_unlock(&msq_mtx);
1007		if ((error = copyin(msgp, &msgpool[next * msginfo.msgssz],
1008		    tlen)) != 0) {
1009			mtx_lock(&msq_mtx);
1010			DPRINTF(("error %d copying in message segment\n",
1011			    error));
1012			msg_freehdr(msghdr);
1013			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
1014			wakeup(msqkptr);
1015			goto done3;
1016		}
1017		mtx_lock(&msq_mtx);
1018		msgsz -= tlen;
1019		msgp = (const char *)msgp + tlen;
1020		next = msgmaps[next].next;
1021	}
1022	if (next != -1)
1023		panic("didn't use all the msg segments");
1024
1025	/*
1026	 * We've got the message.  Unlock the msqid_ds.
1027	 */
1028
1029	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
1030
1031	/*
1032	 * Make sure that the msqid_ds is still allocated.
1033	 */
1034
1035	if (msqkptr->u.msg_qbytes == 0) {
1036		msg_freehdr(msghdr);
1037		wakeup(msqkptr);
1038		error = EIDRM;
1039		goto done3;
1040	}
1041
1042#ifdef MAC
1043	/*
1044	 * Note: Since the task/thread allocates the msghdr and usually
1045	 * primes it with its own MAC label, for a majority of policies, it
1046	 * won't be necessary to check whether the msghdr has access
1047	 * permissions to the msgq.  The mac_sysvmsq_check_msqsnd check would
1048	 * suffice in that case.  However, this hook may be required where
1049	 * individual policies derive a non-identical label for the msghdr
1050	 * from the current thread label and may want to check the msghdr
1051	 * enqueue permissions, along with read/write permissions to the
1052	 * msgq.
1053	 */
1054	error = mac_sysvmsq_check_msgmsq(td->td_ucred, msghdr, msqkptr);
1055	if (error != 0) {
1056		msg_freehdr(msghdr);
1057		wakeup(msqkptr);
1058		goto done3;
1059	}
1060#endif
1061
1062	/*
1063	 * Put the message into the queue
1064	 */
1065	if (msqkptr->u.msg_first == NULL) {
1066		msqkptr->u.msg_first = msghdr;
1067		msqkptr->u.msg_last = msghdr;
1068	} else {
1069		msqkptr->u.msg_last->msg_next = msghdr;
1070		msqkptr->u.msg_last = msghdr;
1071	}
1072	msqkptr->u.msg_last->msg_next = NULL;
1073
1074	msqkptr->u.msg_cbytes += msghdr->msg_ts;
1075	msqkptr->u.msg_qnum++;
1076	msqkptr->u.msg_lspid = td->td_proc->p_pid;
1077	msqkptr->u.msg_stime = time_second;
1078
1079	wakeup(msqkptr);
1080	td->td_retval[0] = 0;
1081done3:
1082#ifdef RACCT
1083	if (racct_enable && error != 0) {
1084		PROC_LOCK(td->td_proc);
1085		racct_sub(td->td_proc, RACCT_MSGQQUEUED, 1);
1086		racct_sub(td->td_proc, RACCT_MSGQSIZE, saved_msgsz);
1087		PROC_UNLOCK(td->td_proc);
1088	}
1089#endif
1090done2:
1091	mtx_unlock(&msq_mtx);
1092	return (error);
1093}
1094
1095int
1096sys_msgsnd(struct thread *td, struct msgsnd_args *uap)
1097{
1098	int error;
1099	long mtype;
1100
1101	DPRINTF(("call to msgsnd(%d, %p, %zu, %d)\n", uap->msqid, uap->msgp,
1102	    uap->msgsz, uap->msgflg));
1103
1104	if ((error = copyin(uap->msgp, &mtype, sizeof(mtype))) != 0) {
1105		DPRINTF(("error %d copying the message type\n", error));
1106		return (error);
1107	}
1108	return (kern_msgsnd(td, uap->msqid,
1109	    (const char *)uap->msgp + sizeof(mtype),
1110	    uap->msgsz, uap->msgflg, mtype));
1111}
1112
1113#ifndef _SYS_SYSPROTO_H_
1114struct msgrcv_args {
1115	int	msqid;
1116	void	*msgp;
1117	size_t	msgsz;
1118	long	msgtyp;
1119	int	msgflg;
1120};
1121#endif
1122/* XXX msgp is actually mtext. */
1123int
1124kern_msgrcv(struct thread *td, int msqid, void *msgp, size_t msgsz, long msgtyp,
1125    int msgflg, long *mtype)
1126{
1127	size_t len;
1128	struct msqid_kernel *msqkptr;
1129	struct msg *msghdr;
1130	struct prison *rpr;
1131	int msqix, error = 0;
1132	short next;
1133
1134	rpr = msg_find_prison(td->td_ucred);
1135	if (rpr == NULL)
1136		return (ENOSYS);
1137
1138	msqix = IPCID_TO_IX(msqid);
1139
1140	if (msqix < 0 || msqix >= msginfo.msgmni) {
1141		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
1142		    msginfo.msgmni));
1143		return (EINVAL);
1144	}
1145
1146	msqkptr = &msqids[msqix];
1147	mtx_lock(&msq_mtx);
1148	if (msqkptr->u.msg_qbytes == 0) {
1149		DPRINTF(("no such message queue id\n"));
1150		error = EINVAL;
1151		goto done2;
1152	}
1153	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1154		DPRINTF(("wrong sequence number\n"));
1155		error = EINVAL;
1156		goto done2;
1157	}
1158
1159	if ((error = msq_prison_cansee(rpr, msqkptr))) {
1160		DPRINTF(("requester can't see prison\n"));
1161		goto done2;
1162	}
1163
1164	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
1165		DPRINTF(("requester doesn't have read access\n"));
1166		goto done2;
1167	}
1168
1169#ifdef MAC
1170	error = mac_sysvmsq_check_msqrcv(td->td_ucred, msqkptr);
1171	if (error != 0)
1172		goto done2;
1173#endif
1174
1175	msghdr = NULL;
1176	while (msghdr == NULL) {
1177		if (msgtyp == 0) {
1178			msghdr = msqkptr->u.msg_first;
1179			if (msghdr != NULL) {
1180				if (msgsz < msghdr->msg_ts &&
1181				    (msgflg & MSG_NOERROR) == 0) {
1182					DPRINTF(("first message on the queue "
1183					    "is too big (want %zu, got %d)\n",
1184					    msgsz, msghdr->msg_ts));
1185					error = E2BIG;
1186					goto done2;
1187				}
1188#ifdef MAC
1189				error = mac_sysvmsq_check_msgrcv(td->td_ucred,
1190				    msghdr);
1191				if (error != 0)
1192					goto done2;
1193#endif
1194				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
1195					msqkptr->u.msg_first = NULL;
1196					msqkptr->u.msg_last = NULL;
1197				} else {
1198					msqkptr->u.msg_first = msghdr->msg_next;
1199					if (msqkptr->u.msg_first == NULL)
1200						panic("msg_first/last screwed up #1");
1201				}
1202			}
1203		} else {
1204			struct msg *previous;
1205			struct msg **prev;
1206
1207			previous = NULL;
1208			prev = &(msqkptr->u.msg_first);
1209			while ((msghdr = *prev) != NULL) {
1210				/*
1211				 * Is this message's type an exact match or is
1212				 * this message's type less than or equal to
1213				 * the absolute value of a negative msgtyp?
1214				 * Note that the second half of this test can
1215				 * NEVER be true if msgtyp is positive since
1216				 * msg_type is always positive!
1217				 */
1218
1219				if (msgtyp == msghdr->msg_type ||
1220				    msghdr->msg_type <= -msgtyp) {
1221					DPRINTF(("found message type %ld, "
1222					    "requested %ld\n",
1223					    msghdr->msg_type, msgtyp));
1224					if (msgsz < msghdr->msg_ts &&
1225					    (msgflg & MSG_NOERROR) == 0) {
1226						DPRINTF(("requested message "
1227						    "on the queue is too big "
1228						    "(want %zu, got %hu)\n",
1229						    msgsz, msghdr->msg_ts));
1230						error = E2BIG;
1231						goto done2;
1232					}
1233#ifdef MAC
1234					error = mac_sysvmsq_check_msgrcv(
1235					    td->td_ucred, msghdr);
1236					if (error != 0)
1237						goto done2;
1238#endif
1239					*prev = msghdr->msg_next;
1240					if (msghdr == msqkptr->u.msg_last) {
1241						if (previous == NULL) {
1242							if (prev !=
1243							    &msqkptr->u.msg_first)
1244								panic("msg_first/last screwed up #2");
1245							msqkptr->u.msg_first =
1246							    NULL;
1247							msqkptr->u.msg_last =
1248							    NULL;
1249						} else {
1250							if (prev ==
1251							    &msqkptr->u.msg_first)
1252								panic("msg_first/last screwed up #3");
1253							msqkptr->u.msg_last =
1254							    previous;
1255						}
1256					}
1257					break;
1258				}
1259				previous = msghdr;
1260				prev = &(msghdr->msg_next);
1261			}
1262		}
1263
1264		/*
1265		 * We've either extracted the msghdr for the appropriate
1266		 * message or there isn't one.
1267		 * If there is one then bail out of this loop.
1268		 */
1269
1270		if (msghdr != NULL)
1271			break;
1272
1273		/*
1274		 * Hmph!  No message found.  Does the user want to wait?
1275		 */
1276
1277		if ((msgflg & IPC_NOWAIT) != 0) {
1278			DPRINTF(("no appropriate message found (msgtyp=%ld)\n",
1279			    msgtyp));
1280			/* The SVID says to return ENOMSG. */
1281			error = ENOMSG;
1282			goto done2;
1283		}
1284
1285		/*
1286		 * Wait for something to happen
1287		 */
1288
1289		DPRINTF(("msgrcv:  goodnight\n"));
1290		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1291		    "msgrcv", 0);
1292		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1293
1294		if (error != 0) {
1295			DPRINTF(("msgrcv:  interrupted system call\n"));
1296			error = EINTR;
1297			goto done2;
1298		}
1299
1300		/*
1301		 * Make sure that the msq queue still exists
1302		 */
1303
1304		if (msqkptr->u.msg_qbytes == 0 ||
1305		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1306			DPRINTF(("msqid deleted\n"));
1307			error = EIDRM;
1308			goto done2;
1309		}
1310	}
1311
1312	/*
1313	 * Return the message to the user.
1314	 *
1315	 * First, do the bookkeeping (before we risk being interrupted).
1316	 */
1317
1318	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1319	msqkptr->u.msg_qnum--;
1320	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1321	msqkptr->u.msg_rtime = time_second;
1322
1323	racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, 1);
1324	racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msghdr->msg_ts);
1325
1326	/*
1327	 * Make msgsz the actual amount that we'll be returning.
1328	 * Note that this effectively truncates the message if it is too long
1329	 * (since msgsz is never increased).
1330	 */
1331
1332	DPRINTF(("found a message, msgsz=%zu, msg_ts=%hu\n", msgsz,
1333	    msghdr->msg_ts));
1334	if (msgsz > msghdr->msg_ts)
1335		msgsz = msghdr->msg_ts;
1336	*mtype = msghdr->msg_type;
1337
1338	/*
1339	 * Return the segments to the user
1340	 */
1341
1342	next = msghdr->msg_spot;
1343	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1344		size_t tlen;
1345
1346		if (msgsz - len > msginfo.msgssz)
1347			tlen = msginfo.msgssz;
1348		else
1349			tlen = msgsz - len;
1350		if (next <= -1)
1351			panic("next too low #3");
1352		if (next >= msginfo.msgseg)
1353			panic("next out of range #3");
1354		mtx_unlock(&msq_mtx);
1355		error = copyout(&msgpool[next * msginfo.msgssz], msgp, tlen);
1356		mtx_lock(&msq_mtx);
1357		if (error != 0) {
1358			DPRINTF(("error (%d) copying out message segment\n",
1359			    error));
1360			msg_freehdr(msghdr);
1361			wakeup(msqkptr);
1362			goto done2;
1363		}
1364		msgp = (char *)msgp + tlen;
1365		next = msgmaps[next].next;
1366	}
1367
1368	/*
1369	 * Done, return the actual number of bytes copied out.
1370	 */
1371
1372	msg_freehdr(msghdr);
1373	wakeup(msqkptr);
1374	td->td_retval[0] = msgsz;
1375done2:
1376	mtx_unlock(&msq_mtx);
1377	return (error);
1378}
1379
1380int
1381sys_msgrcv(struct thread *td, struct msgrcv_args *uap)
1382{
1383	int error;
1384	long mtype;
1385
1386	DPRINTF(("call to msgrcv(%d, %p, %zu, %ld, %d)\n", uap->msqid,
1387	    uap->msgp, uap->msgsz, uap->msgtyp, uap->msgflg));
1388
1389	if ((error = kern_msgrcv(td, uap->msqid,
1390	    (char *)uap->msgp + sizeof(mtype), uap->msgsz,
1391	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1392		return (error);
1393	if ((error = copyout(&mtype, uap->msgp, sizeof(mtype))) != 0)
1394		DPRINTF(("error %d copying the message type\n", error));
1395	return (error);
1396}
1397
1398static int
1399sysctl_msqids(SYSCTL_HANDLER_ARGS)
1400{
1401	struct msqid_kernel tmsqk;
1402#ifdef COMPAT_FREEBSD32
1403	struct msqid_kernel32 tmsqk32;
1404#endif
1405	struct prison *pr, *rpr;
1406	void *outaddr;
1407	size_t outsize;
1408	int error, i;
1409
1410	pr = req->td->td_ucred->cr_prison;
1411	rpr = msg_find_prison(req->td->td_ucred);
1412	error = 0;
1413	for (i = 0; i < msginfo.msgmni; i++) {
1414		mtx_lock(&msq_mtx);
1415		if (msqids[i].u.msg_qbytes == 0 || rpr == NULL ||
1416		    msq_prison_cansee(rpr, &msqids[i]) != 0)
1417			bzero(&tmsqk, sizeof(tmsqk));
1418		else {
1419			tmsqk = msqids[i];
1420			if (tmsqk.cred->cr_prison != pr)
1421				tmsqk.u.msg_perm.key = IPC_PRIVATE;
1422		}
1423		mtx_unlock(&msq_mtx);
1424#ifdef COMPAT_FREEBSD32
1425		if (SV_CURPROC_FLAG(SV_ILP32)) {
1426			bzero(&tmsqk32, sizeof(tmsqk32));
1427			freebsd32_ipcperm_out(&tmsqk.u.msg_perm,
1428			    &tmsqk32.u.msg_perm);
1429			/* Don't copy u.msg_first or u.msg_last */
1430			CP(tmsqk, tmsqk32, u.msg_cbytes);
1431			CP(tmsqk, tmsqk32, u.msg_qnum);
1432			CP(tmsqk, tmsqk32, u.msg_qbytes);
1433			CP(tmsqk, tmsqk32, u.msg_lspid);
1434			CP(tmsqk, tmsqk32, u.msg_lrpid);
1435			CP(tmsqk, tmsqk32, u.msg_stime);
1436			CP(tmsqk, tmsqk32, u.msg_rtime);
1437			CP(tmsqk, tmsqk32, u.msg_ctime);
1438			/* Don't copy label or cred */
1439			outaddr = &tmsqk32;
1440			outsize = sizeof(tmsqk32);
1441		} else
1442#endif
1443		{
1444			/* Don't leak kernel pointers */
1445			tmsqk.u.msg_first = NULL;
1446			tmsqk.u.msg_last = NULL;
1447			tmsqk.label = NULL;
1448			tmsqk.cred = NULL;
1449			/*
1450			 * XXX: some padding also exists, but we take care to
1451			 * allocate our pool of msqid_kernel structs with
1452			 * zeroed memory so this should be OK.
1453			 */
1454			outaddr = &tmsqk;
1455			outsize = sizeof(tmsqk);
1456		}
1457		error = SYSCTL_OUT(req, outaddr, outsize);
1458		if (error != 0)
1459			break;
1460	}
1461	return (error);
1462}
1463
1464SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1465    "Maximum message size");
1466SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0,
1467    "Number of message queue identifiers");
1468SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0,
1469    "Maximum number of bytes in a queue");
1470SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0,
1471    "Maximum number of messages in the system");
1472SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0,
1473    "Size of a message segment");
1474SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
1475    "Number of message segments");
1476SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids,
1477    CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
1478    NULL, 0, sysctl_msqids, "",
1479    "Array of struct msqid_kernel for each potential message queue");
1480
1481static int
1482msg_prison_check(void *obj, void *data)
1483{
1484	struct prison *pr = obj;
1485	struct prison *prpr;
1486	struct vfsoptlist *opts = data;
1487	int error, jsys;
1488
1489	/*
1490	 * sysvmsg is a jailsys integer.
1491	 * It must be "disable" if the parent jail is disabled.
1492	 */
1493	error = vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys));
1494	if (error != ENOENT) {
1495		if (error != 0)
1496			return (error);
1497		switch (jsys) {
1498		case JAIL_SYS_DISABLE:
1499			break;
1500		case JAIL_SYS_NEW:
1501		case JAIL_SYS_INHERIT:
1502			prison_lock(pr->pr_parent);
1503			prpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
1504			prison_unlock(pr->pr_parent);
1505			if (prpr == NULL)
1506				return (EPERM);
1507			break;
1508		default:
1509			return (EINVAL);
1510		}
1511	}
1512
1513	return (0);
1514}
1515
1516static int
1517msg_prison_set(void *obj, void *data)
1518{
1519	struct prison *pr = obj;
1520	struct prison *tpr, *orpr, *nrpr, *trpr;
1521	struct vfsoptlist *opts = data;
1522	void *rsv;
1523	int jsys, descend;
1524
1525	/*
1526	 * sysvmsg controls which jail is the root of the associated msgs (this
1527	 * jail or same as the parent), or if the feature is available at all.
1528	 */
1529	if (vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)) == ENOENT)
1530		jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
1531		    ? JAIL_SYS_INHERIT
1532		    : vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
1533		    ? JAIL_SYS_DISABLE
1534		    : -1;
1535	if (jsys == JAIL_SYS_DISABLE) {
1536		prison_lock(pr);
1537		orpr = osd_jail_get(pr, msg_prison_slot);
1538		if (orpr != NULL)
1539			osd_jail_del(pr, msg_prison_slot);
1540		prison_unlock(pr);
1541		if (orpr != NULL) {
1542			if (orpr == pr)
1543				msg_prison_cleanup(pr);
1544			/* Disable all child jails as well. */
1545			FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
1546				prison_lock(tpr);
1547				trpr = osd_jail_get(tpr, msg_prison_slot);
1548				if (trpr != NULL) {
1549					osd_jail_del(tpr, msg_prison_slot);
1550					prison_unlock(tpr);
1551					if (trpr == tpr)
1552						msg_prison_cleanup(tpr);
1553				} else {
1554					prison_unlock(tpr);
1555					descend = 0;
1556				}
1557			}
1558		}
1559	} else if (jsys != -1) {
1560		if (jsys == JAIL_SYS_NEW)
1561			nrpr = pr;
1562		else {
1563			prison_lock(pr->pr_parent);
1564			nrpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
1565			prison_unlock(pr->pr_parent);
1566		}
1567		rsv = osd_reserve(msg_prison_slot);
1568		prison_lock(pr);
1569		orpr = osd_jail_get(pr, msg_prison_slot);
1570		if (orpr != nrpr)
1571			(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
1572			    nrpr);
1573		else
1574			osd_free_reserved(rsv);
1575		prison_unlock(pr);
1576		if (orpr != nrpr) {
1577			if (orpr == pr)
1578				msg_prison_cleanup(pr);
1579			if (orpr != NULL) {
1580				/* Change child jails matching the old root, */
1581				FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
1582					prison_lock(tpr);
1583					trpr = osd_jail_get(tpr,
1584					    msg_prison_slot);
1585					if (trpr == orpr) {
1586						(void)osd_jail_set(tpr,
1587						    msg_prison_slot, nrpr);
1588						prison_unlock(tpr);
1589						if (trpr == tpr)
1590							msg_prison_cleanup(tpr);
1591					} else {
1592						prison_unlock(tpr);
1593						descend = 0;
1594					}
1595				}
1596			}
1597		}
1598	}
1599
1600	return (0);
1601}
1602
1603static int
1604msg_prison_get(void *obj, void *data)
1605{
1606	struct prison *pr = obj;
1607	struct prison *rpr;
1608	struct vfsoptlist *opts = data;
1609	int error, jsys;
1610
1611	/* Set sysvmsg based on the jail's root prison. */
1612	prison_lock(pr);
1613	rpr = osd_jail_get(pr, msg_prison_slot);
1614	prison_unlock(pr);
1615	jsys = rpr == NULL ? JAIL_SYS_DISABLE
1616	    : rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
1617	error = vfs_setopt(opts, "sysvmsg", &jsys, sizeof(jsys));
1618	if (error == ENOENT)
1619		error = 0;
1620	return (error);
1621}
1622
1623static int
1624msg_prison_remove(void *obj, void *data __unused)
1625{
1626	struct prison *pr = obj;
1627	struct prison *rpr;
1628
1629	prison_lock(pr);
1630	rpr = osd_jail_get(pr, msg_prison_slot);
1631	prison_unlock(pr);
1632	if (rpr == pr)
1633		msg_prison_cleanup(pr);
1634	return (0);
1635}
1636
1637static void
1638msg_prison_cleanup(struct prison *pr)
1639{
1640	struct msqid_kernel *msqkptr;
1641	int i;
1642
1643	/* Remove any msqs that belong to this jail. */
1644	mtx_lock(&msq_mtx);
1645	for (i = 0; i < msginfo.msgmni; i++) {
1646		msqkptr = &msqids[i];
1647		if (msqkptr->u.msg_qbytes != 0 &&
1648		    msqkptr->cred != NULL && msqkptr->cred->cr_prison == pr)
1649			msq_remove(msqkptr);
1650	}
1651	mtx_unlock(&msq_mtx);
1652}
1653
1654SYSCTL_JAIL_PARAM_SYS_NODE(sysvmsg, CTLFLAG_RW, "SYSV message queues");
1655
1656#ifdef COMPAT_FREEBSD32
1657int
1658freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap)
1659{
1660
1661#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1662    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1663	switch (uap->which) {
1664	case 0:
1665		return (freebsd7_freebsd32_msgctl(td,
1666		    (struct freebsd7_freebsd32_msgctl_args *)&uap->a2));
1667	case 2:
1668		return (freebsd32_msgsnd(td,
1669		    (struct freebsd32_msgsnd_args *)&uap->a2));
1670	case 3:
1671		return (freebsd32_msgrcv(td,
1672		    (struct freebsd32_msgrcv_args *)&uap->a2));
1673	default:
1674		return (sys_msgsys(td, (struct msgsys_args *)uap));
1675	}
1676#else
1677	return (nosys(td, NULL));
1678#endif
1679}
1680
1681#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1682    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1683int
1684freebsd7_freebsd32_msgctl(struct thread *td,
1685    struct freebsd7_freebsd32_msgctl_args *uap)
1686{
1687	struct msqid_ds msqbuf;
1688	struct msqid_ds32_old msqbuf32;
1689	int error;
1690
1691	if (uap->cmd == IPC_SET) {
1692		error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32));
1693		if (error)
1694			return (error);
1695		freebsd32_ipcperm_old_in(&msqbuf32.msg_perm, &msqbuf.msg_perm);
1696		PTRIN_CP(msqbuf32, msqbuf, msg_first);
1697		PTRIN_CP(msqbuf32, msqbuf, msg_last);
1698		CP(msqbuf32, msqbuf, msg_cbytes);
1699		CP(msqbuf32, msqbuf, msg_qnum);
1700		CP(msqbuf32, msqbuf, msg_qbytes);
1701		CP(msqbuf32, msqbuf, msg_lspid);
1702		CP(msqbuf32, msqbuf, msg_lrpid);
1703		CP(msqbuf32, msqbuf, msg_stime);
1704		CP(msqbuf32, msqbuf, msg_rtime);
1705		CP(msqbuf32, msqbuf, msg_ctime);
1706	}
1707	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1708	if (error)
1709		return (error);
1710	if (uap->cmd == IPC_STAT) {
1711		bzero(&msqbuf32, sizeof(msqbuf32));
1712		freebsd32_ipcperm_old_out(&msqbuf.msg_perm, &msqbuf32.msg_perm);
1713		PTROUT_CP(msqbuf, msqbuf32, msg_first);
1714		PTROUT_CP(msqbuf, msqbuf32, msg_last);
1715		CP(msqbuf, msqbuf32, msg_cbytes);
1716		CP(msqbuf, msqbuf32, msg_qnum);
1717		CP(msqbuf, msqbuf32, msg_qbytes);
1718		CP(msqbuf, msqbuf32, msg_lspid);
1719		CP(msqbuf, msqbuf32, msg_lrpid);
1720		CP(msqbuf, msqbuf32, msg_stime);
1721		CP(msqbuf, msqbuf32, msg_rtime);
1722		CP(msqbuf, msqbuf32, msg_ctime);
1723		error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32));
1724	}
1725	return (error);
1726}
1727#endif
1728
1729int
1730freebsd32_msgctl(struct thread *td, struct freebsd32_msgctl_args *uap)
1731{
1732	struct msqid_ds msqbuf;
1733	struct msqid_ds32 msqbuf32;
1734	int error;
1735
1736	if (uap->cmd == IPC_SET) {
1737		error = copyin(uap->buf, &msqbuf32, sizeof(msqbuf32));
1738		if (error)
1739			return (error);
1740		freebsd32_ipcperm_in(&msqbuf32.msg_perm, &msqbuf.msg_perm);
1741		PTRIN_CP(msqbuf32, msqbuf, msg_first);
1742		PTRIN_CP(msqbuf32, msqbuf, msg_last);
1743		CP(msqbuf32, msqbuf, msg_cbytes);
1744		CP(msqbuf32, msqbuf, msg_qnum);
1745		CP(msqbuf32, msqbuf, msg_qbytes);
1746		CP(msqbuf32, msqbuf, msg_lspid);
1747		CP(msqbuf32, msqbuf, msg_lrpid);
1748		CP(msqbuf32, msqbuf, msg_stime);
1749		CP(msqbuf32, msqbuf, msg_rtime);
1750		CP(msqbuf32, msqbuf, msg_ctime);
1751	}
1752	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1753	if (error)
1754		return (error);
1755	if (uap->cmd == IPC_STAT) {
1756		freebsd32_ipcperm_out(&msqbuf.msg_perm, &msqbuf32.msg_perm);
1757		PTROUT_CP(msqbuf, msqbuf32, msg_first);
1758		PTROUT_CP(msqbuf, msqbuf32, msg_last);
1759		CP(msqbuf, msqbuf32, msg_cbytes);
1760		CP(msqbuf, msqbuf32, msg_qnum);
1761		CP(msqbuf, msqbuf32, msg_qbytes);
1762		CP(msqbuf, msqbuf32, msg_lspid);
1763		CP(msqbuf, msqbuf32, msg_lrpid);
1764		CP(msqbuf, msqbuf32, msg_stime);
1765		CP(msqbuf, msqbuf32, msg_rtime);
1766		CP(msqbuf, msqbuf32, msg_ctime);
1767		error = copyout(&msqbuf32, uap->buf, sizeof(struct msqid_ds32));
1768	}
1769	return (error);
1770}
1771
1772int
1773freebsd32_msgsnd(struct thread *td, struct freebsd32_msgsnd_args *uap)
1774{
1775	const void *msgp;
1776	long mtype;
1777	int32_t mtype32;
1778	int error;
1779
1780	msgp = PTRIN(uap->msgp);
1781	if ((error = copyin(msgp, &mtype32, sizeof(mtype32))) != 0)
1782		return (error);
1783	mtype = mtype32;
1784	return (kern_msgsnd(td, uap->msqid,
1785	    (const char *)msgp + sizeof(mtype32),
1786	    uap->msgsz, uap->msgflg, mtype));
1787}
1788
1789int
1790freebsd32_msgrcv(struct thread *td, struct freebsd32_msgrcv_args *uap)
1791{
1792	void *msgp;
1793	long mtype;
1794	int32_t mtype32;
1795	int error;
1796
1797	msgp = PTRIN(uap->msgp);
1798	if ((error = kern_msgrcv(td, uap->msqid,
1799	    (char *)msgp + sizeof(mtype32), uap->msgsz,
1800	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1801		return (error);
1802	mtype32 = (int32_t)mtype;
1803	return (copyout(&mtype32, msgp, sizeof(mtype32)));
1804}
1805#endif
1806
1807#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1808    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1809
1810/* XXX casting to (sy_call_t *) is bogus, as usual. */
1811static sy_call_t *msgcalls[] = {
1812	(sy_call_t *)freebsd7_msgctl, (sy_call_t *)sys_msgget,
1813	(sy_call_t *)sys_msgsnd, (sy_call_t *)sys_msgrcv
1814};
1815
1816/*
1817 * Entry point for all MSG calls.
1818 *
1819 * XXX actually varargs.
1820 * struct msgsys_args {
1821 *		int	which;
1822 *		int	a2;
1823 *		int	a3;
1824 *		int	a4;
1825 *		int	a5;
1826 *		int	a6;
1827 *	} *uap;
1828 */
1829int
1830sys_msgsys(struct thread *td, struct msgsys_args *uap)
1831{
1832	int error;
1833
1834	if (uap->which < 0 || uap->which >= nitems(msgcalls))
1835		return (EINVAL);
1836	error = (*msgcalls[uap->which])(td, &uap->a2);
1837	return (error);
1838}
1839
1840#ifndef CP
1841#define CP(src, dst, fld)	do { (dst).fld = (src).fld; } while (0)
1842#endif
1843
1844#ifndef _SYS_SYSPROTO_H_
1845struct freebsd7_msgctl_args {
1846	int	msqid;
1847	int	cmd;
1848	struct	msqid_ds_old *buf;
1849};
1850#endif
1851int
1852freebsd7_msgctl(struct thread *td, struct freebsd7_msgctl_args *uap)
1853{
1854	struct msqid_ds_old msqold;
1855	struct msqid_ds msqbuf;
1856	int error;
1857
1858	DPRINTF(("call to freebsd7_msgctl(%d, %d, %p)\n", uap->msqid, uap->cmd,
1859	    uap->buf));
1860	if (uap->cmd == IPC_SET) {
1861		error = copyin(uap->buf, &msqold, sizeof(msqold));
1862		if (error)
1863			return (error);
1864		ipcperm_old2new(&msqold.msg_perm, &msqbuf.msg_perm);
1865		CP(msqold, msqbuf, msg_first);
1866		CP(msqold, msqbuf, msg_last);
1867		CP(msqold, msqbuf, msg_cbytes);
1868		CP(msqold, msqbuf, msg_qnum);
1869		CP(msqold, msqbuf, msg_qbytes);
1870		CP(msqold, msqbuf, msg_lspid);
1871		CP(msqold, msqbuf, msg_lrpid);
1872		CP(msqold, msqbuf, msg_stime);
1873		CP(msqold, msqbuf, msg_rtime);
1874		CP(msqold, msqbuf, msg_ctime);
1875	}
1876	error = kern_msgctl(td, uap->msqid, uap->cmd, &msqbuf);
1877	if (error)
1878		return (error);
1879	if (uap->cmd == IPC_STAT) {
1880		bzero(&msqold, sizeof(msqold));
1881		ipcperm_new2old(&msqbuf.msg_perm, &msqold.msg_perm);
1882		CP(msqbuf, msqold, msg_first);
1883		CP(msqbuf, msqold, msg_last);
1884		CP(msqbuf, msqold, msg_cbytes);
1885		CP(msqbuf, msqold, msg_qnum);
1886		CP(msqbuf, msqold, msg_qbytes);
1887		CP(msqbuf, msqold, msg_lspid);
1888		CP(msqbuf, msqold, msg_lrpid);
1889		CP(msqbuf, msqold, msg_stime);
1890		CP(msqbuf, msqold, msg_rtime);
1891		CP(msqbuf, msqold, msg_ctime);
1892		error = copyout(&msqold, uap->buf, sizeof(struct msqid_ds_old));
1893	}
1894	return (error);
1895}
1896
1897#undef CP
1898
1899#endif	/* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 ||
1900	   COMPAT_FREEBSD7 */
1901