sysv_msg.c revision 140839
1/*-
2 * Implementation of SVID messages
3 *
4 * Author:  Daniel Boulet
5 *
6 * Copyright 1993 Daniel Boulet and RTMX Inc.
7 *
8 * This system call was implemented by Daniel Boulet under contract from RTMX.
9 *
10 * Redistribution and use in source forms, with and without modification,
11 * are permitted provided that this entire comment appears intact.
12 *
13 * Redistribution in binary form may occur without any restrictions.
14 * Obviously, it would be nice if you gave credit where credit is due
15 * but requiring it would be too onerous.
16 *
17 * This software is provided ``AS IS'' without any warranties of any kind.
18 */
19/*-
20 * Copyright (c) 2003-2005 McAfee, Inc.
21 * All rights reserved.
22 *
23 * This software was developed for the FreeBSD Project in part by McAfee
24 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
25 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
26 * program.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 *    notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 *    notice, this list of conditions and the following disclaimer in the
35 *    documentation and/or other materials provided with the distribution.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: head/sys/kern/sysv_msg.c 140839 2005-01-26 00:46:36Z sobomax $");
52
53#include "opt_sysvipc.h"
54#include "opt_mac.h"
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/sysproto.h>
59#include <sys/kernel.h>
60#include <sys/proc.h>
61#include <sys/lock.h>
62#include <sys/mac.h>
63#include <sys/mutex.h>
64#include <sys/module.h>
65#include <sys/msg.h>
66#include <sys/syscall.h>
67#include <sys/syscallsubr.h>
68#include <sys/sysent.h>
69#include <sys/sysctl.h>
70#include <sys/malloc.h>
71#include <sys/jail.h>
72
73static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
74
75static void msginit(void);
76static int msgunload(void);
77static int sysvmsg_modload(struct module *, int, void *);
78
79#ifdef MSG_DEBUG
80#define DPRINTF(a)	printf a
81#else
82#define DPRINTF(a)
83#endif
84#ifdef MAC_DEBUG
85#define MPRINTF(a)	printf a
86#else
87#define MPRINTF(a)
88#endif
89
90static void msg_freehdr(struct msg *msghdr);
91
92/* XXX casting to (sy_call_t *) is bogus, as usual. */
93static sy_call_t *msgcalls[] = {
94	(sy_call_t *)msgctl, (sy_call_t *)msgget,
95	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
96};
97
98#ifndef MSGSSZ
99#define MSGSSZ	8		/* Each segment must be 2^N long */
100#endif
101#ifndef MSGSEG
102#define MSGSEG	2048		/* must be less than 32767 */
103#endif
104#define MSGMAX	(MSGSSZ*MSGSEG)
105#ifndef MSGMNB
106#define MSGMNB	2048		/* max # of bytes in a queue */
107#endif
108#ifndef MSGMNI
109#define MSGMNI	40
110#endif
111#ifndef MSGTQL
112#define MSGTQL	40
113#endif
114
115/*
116 * Based on the configuration parameters described in an SVR2 (yes, two)
117 * config(1m) man page.
118 *
119 * Each message is broken up and stored in segments that are msgssz bytes
120 * long.  For efficiency reasons, this should be a power of two.  Also,
121 * it doesn't make sense if it is less than 8 or greater than about 256.
122 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
123 * two between 8 and 1024 inclusive (and panic's if it isn't).
124 */
125struct msginfo msginfo = {
126                MSGMAX,         /* max chars in a message */
127                MSGMNI,         /* # of message queue identifiers */
128                MSGMNB,         /* max chars in a queue */
129                MSGTQL,         /* max messages in system */
130                MSGSSZ,         /* size of a message segment */
131                		/* (must be small power of 2 greater than 4) */
132                MSGSEG          /* number of message segments */
133};
134
135/*
136 * macros to convert between msqid_ds's and msqid's.
137 * (specific to this implementation)
138 */
139#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
140#define MSQID_IX(id)	((id) & 0xffff)
141#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
142
143/*
144 * The rest of this file is specific to this particular implementation.
145 */
146
147struct msgmap {
148	short	next;		/* next segment in buffer */
149    				/* -1 -> available */
150    				/* 0..(MSGSEG-1) -> index of next segment */
151};
152
153#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
154
155static int nfree_msgmaps;	/* # of free map entries */
156static short free_msgmaps;	/* head of linked list of free map entries */
157static struct msg *free_msghdrs;/* list of free msg headers */
158static char *msgpool;		/* MSGMAX byte long msg buffer pool */
159static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
160static struct msg *msghdrs;	/* MSGTQL msg headers */
161static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
162static struct mtx msq_mtx;	/* global mutex for message queues. */
163
164static void
165msginit()
166{
167	register int i;
168
169	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
170	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
171	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
172	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
173	TUNABLE_INT_FETCH("kern.ipc.msgmnb", &msginfo.msgmnb);
174	TUNABLE_INT_FETCH("kern.ipc.msgtql", &msginfo.msgtql);
175
176	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
177	if (msgpool == NULL)
178		panic("msgpool is NULL");
179	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
180	if (msgmaps == NULL)
181		panic("msgmaps is NULL");
182	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
183	if (msghdrs == NULL)
184		panic("msghdrs is NULL");
185	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
186	    M_WAITOK);
187	if (msqids == NULL)
188		panic("msqids is NULL");
189
190	/*
191	 * msginfo.msgssz should be a power of two for efficiency reasons.
192	 * It is also pretty silly if msginfo.msgssz is less than 8
193	 * or greater than about 256 so ...
194	 */
195
196	i = 8;
197	while (i < 1024 && i != msginfo.msgssz)
198		i <<= 1;
199    	if (i != msginfo.msgssz) {
200		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
201		    msginfo.msgssz));
202		panic("msginfo.msgssz not a small power of 2");
203	}
204
205	if (msginfo.msgseg > 32767) {
206		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
207		panic("msginfo.msgseg > 32767");
208	}
209
210	if (msgmaps == NULL)
211		panic("msgmaps is NULL");
212
213	for (i = 0; i < msginfo.msgseg; i++) {
214		if (i > 0)
215			msgmaps[i-1].next = i;
216		msgmaps[i].next = -1;	/* implies entry is available */
217	}
218	free_msgmaps = 0;
219	nfree_msgmaps = msginfo.msgseg;
220
221	if (msghdrs == NULL)
222		panic("msghdrs is NULL");
223
224	for (i = 0; i < msginfo.msgtql; i++) {
225		msghdrs[i].msg_type = 0;
226		if (i > 0)
227			msghdrs[i-1].msg_next = &msghdrs[i];
228		msghdrs[i].msg_next = NULL;
229#ifdef MAC
230		mac_init_sysv_msgmsg(&msghdrs[i]);
231#endif
232    	}
233	free_msghdrs = &msghdrs[0];
234
235	if (msqids == NULL)
236		panic("msqids is NULL");
237
238	for (i = 0; i < msginfo.msgmni; i++) {
239		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
240		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
241		msqids[i].u.msg_perm.mode = 0;
242#ifdef MAC
243		mac_init_sysv_msgqueue(&msqids[i]);
244#endif
245	}
246	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
247}
248
249static int
250msgunload()
251{
252	struct msqid_kernel *msqkptr;
253	int msqid;
254#ifdef MAC
255	int i;
256#endif
257
258	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
259		/*
260		 * Look for an unallocated and unlocked msqid_ds.
261		 * msqid_ds's can be locked by msgsnd or msgrcv while
262		 * they are copying the message in/out.  We can't
263		 * re-use the entry until they release it.
264		 */
265		msqkptr = &msqids[msqid];
266		if (msqkptr->u.msg_qbytes != 0 ||
267		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
268			break;
269	}
270	if (msqid != msginfo.msgmni)
271		return (EBUSY);
272
273#ifdef MAC
274	for (i = 0; i < msginfo.msgtql; i++)
275		mac_destroy_sysv_msgmsg(&msghdrs[i]);
276	for (msqid = 0; msqid < msginfo.msgmni; msqid++)
277		mac_destroy_sysv_msgqueue(&msqids[msqid]);
278#endif
279	free(msgpool, M_MSG);
280	free(msgmaps, M_MSG);
281	free(msghdrs, M_MSG);
282	free(msqids, M_MSG);
283	mtx_destroy(&msq_mtx);
284	return (0);
285}
286
287
288static int
289sysvmsg_modload(struct module *module, int cmd, void *arg)
290{
291	int error = 0;
292
293	switch (cmd) {
294	case MOD_LOAD:
295		msginit();
296		break;
297	case MOD_UNLOAD:
298		error = msgunload();
299		break;
300	case MOD_SHUTDOWN:
301		break;
302	default:
303		error = EINVAL;
304		break;
305	}
306	return (error);
307}
308
309static moduledata_t sysvmsg_mod = {
310	"sysvmsg",
311	&sysvmsg_modload,
312	NULL
313};
314
315SYSCALL_MODULE_HELPER(msgsys);
316SYSCALL_MODULE_HELPER(msgctl);
317SYSCALL_MODULE_HELPER(msgget);
318SYSCALL_MODULE_HELPER(msgsnd);
319SYSCALL_MODULE_HELPER(msgrcv);
320
321DECLARE_MODULE(sysvmsg, sysvmsg_mod,
322	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
323MODULE_VERSION(sysvmsg, 1);
324
325/*
326 * Entry point for all MSG calls
327 *
328 * MPSAFE
329 */
330int
331msgsys(td, uap)
332	struct thread *td;
333	/* XXX actually varargs. */
334	struct msgsys_args /* {
335		int	which;
336		int	a2;
337		int	a3;
338		int	a4;
339		int	a5;
340		int	a6;
341	} */ *uap;
342{
343	int error;
344
345	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
346		return (ENOSYS);
347	if (uap->which < 0 ||
348	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
349		return (EINVAL);
350	error = (*msgcalls[uap->which])(td, &uap->a2);
351	return (error);
352}
353
354static void
355msg_freehdr(msghdr)
356	struct msg *msghdr;
357{
358	while (msghdr->msg_ts > 0) {
359		short next;
360		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
361			panic("msghdr->msg_spot out of range");
362		next = msgmaps[msghdr->msg_spot].next;
363		msgmaps[msghdr->msg_spot].next = free_msgmaps;
364		free_msgmaps = msghdr->msg_spot;
365		nfree_msgmaps++;
366		msghdr->msg_spot = next;
367		if (msghdr->msg_ts >= msginfo.msgssz)
368			msghdr->msg_ts -= msginfo.msgssz;
369		else
370			msghdr->msg_ts = 0;
371	}
372	if (msghdr->msg_spot != -1)
373		panic("msghdr->msg_spot != -1");
374	msghdr->msg_next = free_msghdrs;
375	free_msghdrs = msghdr;
376#ifdef MAC
377	mac_cleanup_sysv_msgmsg(msghdr);
378#endif
379}
380
381#ifndef _SYS_SYSPROTO_H_
382struct msgctl_args {
383	int	msqid;
384	int	cmd;
385	struct	msqid_ds *buf;
386};
387#endif
388
389/*
390 * MPSAFE
391 */
392int
393msgctl(td, uap)
394	struct thread *td;
395	register struct msgctl_args *uap;
396{
397	int msqid = uap->msqid;
398	int cmd = uap->cmd;
399	struct msqid_ds msqbuf;
400	struct msqid_ds *msqptr;
401	int error;
402
403	DPRINTF(("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, uap->buf));
404	if (cmd == IPC_SET &&
405	    (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0)
406		return (error);
407	error = kern_msgctl(td, msqid, cmd, &msqbuf, &msqptr);
408	if (cmd == IPC_STAT && error == 0)
409		error = copyout(msqptr, uap->buf, sizeof(struct msqid_ds));
410	return (error);
411}
412
413int
414kern_msgctl(td, msqid, cmd, msqbuf, msqptr)
415	struct thread *td;
416	int msqid;
417	int cmd;
418	struct msqid_ds *msqbuf;
419	struct msqid_ds **msqptr;
420{
421	int rval, error, msqix;
422	register struct msqid_kernel *msqkptr;
423
424	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
425		return (ENOSYS);
426
427	msqix = IPCID_TO_IX(msqid);
428
429	if (msqix < 0 || msqix >= msginfo.msgmni) {
430		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
431		    msginfo.msgmni));
432		return (EINVAL);
433	}
434
435	msqkptr = &msqids[msqix];
436
437	mtx_lock(&msq_mtx);
438	if (msqkptr->u.msg_qbytes == 0) {
439		DPRINTF(("no such msqid\n"));
440		error = EINVAL;
441		goto done2;
442	}
443	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
444		DPRINTF(("wrong sequence number\n"));
445		error = EINVAL;
446		goto done2;
447	}
448#ifdef MAC
449	error = mac_check_sysv_msqctl(td->td_ucred, msqkptr, cmd);
450	if (error != 0) {
451		MPRINTF(("mac_check_sysv_msqctl returned %d\n", error));
452		goto done2;
453	}
454#endif
455
456	error = 0;
457	rval = 0;
458
459	switch (cmd) {
460
461	case IPC_RMID:
462	{
463		struct msg *msghdr;
464		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
465			goto done2;
466
467#ifdef MAC
468		/*
469		 * Check that the thread has MAC access permissions to
470		 * individual msghdrs.  Note: We need to do this in a
471		 * separate loop because the actual loop alters the
472		 * msq/msghdr info as it progresses, and there is no going
473		 * back if half the way through we discover that the
474		 * thread cannot free a certain msghdr.  The msq will get
475		 * into an inconsistent state.
476		 */
477		for (msghdr = msqkptr->u.msg_first; msghdr != NULL;
478		    msghdr = msghdr->msg_next) {
479			error = mac_check_sysv_msgrmid(td->td_ucred, msghdr);
480			if (error != 0) {
481				MPRINTF(("mac_check_sysv_msgrmid returned %d\n",
482				    error));
483				goto done2;
484			}
485		}
486#endif
487
488		/* Free the message headers */
489		msghdr = msqkptr->u.msg_first;
490		while (msghdr != NULL) {
491			struct msg *msghdr_tmp;
492
493			/* Free the segments of each message */
494			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
495			msqkptr->u.msg_qnum--;
496			msghdr_tmp = msghdr;
497			msghdr = msghdr->msg_next;
498			msg_freehdr(msghdr_tmp);
499		}
500
501		if (msqkptr->u.msg_cbytes != 0)
502			panic("msg_cbytes is screwed up");
503		if (msqkptr->u.msg_qnum != 0)
504			panic("msg_qnum is screwed up");
505
506		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
507
508#ifdef MAC
509		mac_cleanup_sysv_msgqueue(msqkptr);
510#endif
511
512		wakeup(msqkptr);
513	}
514
515		break;
516
517	case IPC_SET:
518		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
519			goto done2;
520		if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) {
521			error = suser(td);
522			if (error)
523				goto done2;
524		}
525		if (msqbuf->msg_qbytes > msginfo.msgmnb) {
526			DPRINTF(("can't increase msg_qbytes beyond %d"
527			    "(truncating)\n", msginfo.msgmnb));
528			msqbuf->msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
529		}
530		if (msqbuf->msg_qbytes == 0) {
531			DPRINTF(("can't reduce msg_qbytes to 0\n"));
532			error = EINVAL;		/* non-standard errno! */
533			goto done2;
534		}
535		msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid;	/* change the owner */
536		msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid;	/* change the owner */
537		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
538		    (msqbuf->msg_perm.mode & 0777);
539		msqkptr->u.msg_qbytes = msqbuf->msg_qbytes;
540		msqkptr->u.msg_ctime = time_second;
541		break;
542
543	case IPC_STAT:
544		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
545			DPRINTF(("requester doesn't have read access\n"));
546			goto done2;
547		}
548		*msqptr = &(msqkptr->u);
549		break;
550
551	default:
552		DPRINTF(("invalid command %d\n", cmd));
553		error = EINVAL;
554		goto done2;
555	}
556
557	if (error == 0)
558		td->td_retval[0] = rval;
559done2:
560	mtx_unlock(&msq_mtx);
561	return(error);
562}
563
564#ifndef _SYS_SYSPROTO_H_
565struct msgget_args {
566	key_t	key;
567	int	msgflg;
568};
569#endif
570
571/*
572 * MPSAFE
573 */
574int
575msgget(td, uap)
576	struct thread *td;
577	register struct msgget_args *uap;
578{
579	int msqid, error = 0;
580	int key = uap->key;
581	int msgflg = uap->msgflg;
582	struct ucred *cred = td->td_ucred;
583	register struct msqid_kernel *msqkptr = NULL;
584
585	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
586
587	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
588		return (ENOSYS);
589
590	mtx_lock(&msq_mtx);
591	if (key != IPC_PRIVATE) {
592		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
593			msqkptr = &msqids[msqid];
594			if (msqkptr->u.msg_qbytes != 0 &&
595			    msqkptr->u.msg_perm.key == key)
596				break;
597		}
598		if (msqid < msginfo.msgmni) {
599			DPRINTF(("found public key\n"));
600			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
601				DPRINTF(("not exclusive\n"));
602				error = EEXIST;
603				goto done2;
604			}
605			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
606			    msgflg & 0700))) {
607				DPRINTF(("requester doesn't have 0%o access\n",
608				    msgflg & 0700));
609				goto done2;
610			}
611#ifdef MAC
612			error = mac_check_sysv_msqget(cred, msqkptr);
613			if (error != 0) {
614				MPRINTF(("mac_check_sysv_msqget returned %d\n",
615				    error));
616				goto done2;
617			}
618#endif
619			goto found;
620		}
621	}
622
623	DPRINTF(("need to allocate the msqid_ds\n"));
624	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
625		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
626			/*
627			 * Look for an unallocated and unlocked msqid_ds.
628			 * msqid_ds's can be locked by msgsnd or msgrcv while
629			 * they are copying the message in/out.  We can't
630			 * re-use the entry until they release it.
631			 */
632			msqkptr = &msqids[msqid];
633			if (msqkptr->u.msg_qbytes == 0 &&
634			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
635				break;
636		}
637		if (msqid == msginfo.msgmni) {
638			DPRINTF(("no more msqid_ds's available\n"));
639			error = ENOSPC;
640			goto done2;
641		}
642		DPRINTF(("msqid %d is available\n", msqid));
643		msqkptr->u.msg_perm.key = key;
644		msqkptr->u.msg_perm.cuid = cred->cr_uid;
645		msqkptr->u.msg_perm.uid = cred->cr_uid;
646		msqkptr->u.msg_perm.cgid = cred->cr_gid;
647		msqkptr->u.msg_perm.gid = cred->cr_gid;
648		msqkptr->u.msg_perm.mode = (msgflg & 0777);
649		/* Make sure that the returned msqid is unique */
650		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
651		msqkptr->u.msg_first = NULL;
652		msqkptr->u.msg_last = NULL;
653		msqkptr->u.msg_cbytes = 0;
654		msqkptr->u.msg_qnum = 0;
655		msqkptr->u.msg_qbytes = msginfo.msgmnb;
656		msqkptr->u.msg_lspid = 0;
657		msqkptr->u.msg_lrpid = 0;
658		msqkptr->u.msg_stime = 0;
659		msqkptr->u.msg_rtime = 0;
660		msqkptr->u.msg_ctime = time_second;
661#ifdef MAC
662		mac_create_sysv_msgqueue(cred, msqkptr);
663#endif
664	} else {
665		DPRINTF(("didn't find it and wasn't asked to create it\n"));
666		error = ENOENT;
667		goto done2;
668	}
669
670found:
671	/* Construct the unique msqid */
672	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
673done2:
674	mtx_unlock(&msq_mtx);
675	return (error);
676}
677
678#ifndef _SYS_SYSPROTO_H_
679struct msgsnd_args {
680	int	msqid;
681	const void	*msgp;
682	size_t	msgsz;
683	int	msgflg;
684};
685#endif
686
687/*
688 * MPSAFE
689 */
690int
691msgsnd(td, uap)
692	struct thread *td;
693	register struct msgsnd_args *uap;
694{
695	int msqid = uap->msqid;
696	const void *user_msgp = uap->msgp;
697	size_t msgsz = uap->msgsz;
698	int msgflg = uap->msgflg;
699	int segs_needed, error = 0;
700	register struct msqid_kernel *msqkptr;
701	register struct msg *msghdr;
702	short next;
703
704	DPRINTF(("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
705	    msgflg));
706	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
707		return (ENOSYS);
708
709	mtx_lock(&msq_mtx);
710	msqid = IPCID_TO_IX(msqid);
711
712	if (msqid < 0 || msqid >= msginfo.msgmni) {
713		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
714		    msginfo.msgmni));
715		error = EINVAL;
716		goto done2;
717	}
718
719	msqkptr = &msqids[msqid];
720	if (msqkptr->u.msg_qbytes == 0) {
721		DPRINTF(("no such message queue id\n"));
722		error = EINVAL;
723		goto done2;
724	}
725	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
726		DPRINTF(("wrong sequence number\n"));
727		error = EINVAL;
728		goto done2;
729	}
730
731	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
732		DPRINTF(("requester doesn't have write access\n"));
733		goto done2;
734	}
735
736#ifdef MAC
737	error = mac_check_sysv_msqsnd(td->td_ucred, msqkptr);
738	if (error != 0) {
739		MPRINTF(("mac_check_sysv_msqsnd returned %d\n", error));
740		goto done2;
741	}
742#endif
743
744	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
745	DPRINTF(("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
746	    segs_needed));
747	for (;;) {
748		int need_more_resources = 0;
749
750		/*
751		 * check msgsz
752		 * (inside this loop in case msg_qbytes changes while we sleep)
753		 */
754
755		if (msgsz > msqkptr->u.msg_qbytes) {
756			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
757			error = EINVAL;
758			goto done2;
759		}
760
761		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
762			DPRINTF(("msqid is locked\n"));
763			need_more_resources = 1;
764		}
765		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
766			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
767			need_more_resources = 1;
768		}
769		if (segs_needed > nfree_msgmaps) {
770			DPRINTF(("segs_needed > nfree_msgmaps\n"));
771			need_more_resources = 1;
772		}
773		if (free_msghdrs == NULL) {
774			DPRINTF(("no more msghdrs\n"));
775			need_more_resources = 1;
776		}
777
778		if (need_more_resources) {
779			int we_own_it;
780
781			if ((msgflg & IPC_NOWAIT) != 0) {
782				DPRINTF(("need more resources but caller "
783				    "doesn't want to wait\n"));
784				error = EAGAIN;
785				goto done2;
786			}
787
788			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
789				DPRINTF(("we don't own the msqid_ds\n"));
790				we_own_it = 0;
791			} else {
792				/* Force later arrivals to wait for our
793				   request */
794				DPRINTF(("we own the msqid_ds\n"));
795				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
796				we_own_it = 1;
797			}
798			DPRINTF(("goodnight\n"));
799			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
800			    "msgwait", 0);
801			DPRINTF(("good morning, error=%d\n", error));
802			if (we_own_it)
803				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
804			if (error != 0) {
805				DPRINTF(("msgsnd:  interrupted system call\n"));
806				error = EINTR;
807				goto done2;
808			}
809
810			/*
811			 * Make sure that the msq queue still exists
812			 */
813
814			if (msqkptr->u.msg_qbytes == 0) {
815				DPRINTF(("msqid deleted\n"));
816				error = EIDRM;
817				goto done2;
818			}
819
820		} else {
821			DPRINTF(("got all the resources that we need\n"));
822			break;
823		}
824	}
825
826	/*
827	 * We have the resources that we need.
828	 * Make sure!
829	 */
830
831	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
832		panic("msg_perm.mode & MSG_LOCKED");
833	if (segs_needed > nfree_msgmaps)
834		panic("segs_needed > nfree_msgmaps");
835	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
836		panic("msgsz + msg_cbytes > msg_qbytes");
837	if (free_msghdrs == NULL)
838		panic("no more msghdrs");
839
840	/*
841	 * Re-lock the msqid_ds in case we page-fault when copying in the
842	 * message
843	 */
844
845	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
846		panic("msqid_ds is already locked");
847	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
848
849	/*
850	 * Allocate a message header
851	 */
852
853	msghdr = free_msghdrs;
854	free_msghdrs = msghdr->msg_next;
855	msghdr->msg_spot = -1;
856	msghdr->msg_ts = msgsz;
857#ifdef MAC
858	/*
859	 * XXXMAC: Should the mac_check_sysv_msgmsq check follow here
860	 * immediately?  Or, should it be checked just before the msg is
861	 * enqueued in the msgq (as it is done now)?
862	 */
863	mac_create_sysv_msgmsg(td->td_ucred, msqkptr, msghdr);
864#endif
865
866	/*
867	 * Allocate space for the message
868	 */
869
870	while (segs_needed > 0) {
871		if (nfree_msgmaps <= 0)
872			panic("not enough msgmaps");
873		if (free_msgmaps == -1)
874			panic("nil free_msgmaps");
875		next = free_msgmaps;
876		if (next <= -1)
877			panic("next too low #1");
878		if (next >= msginfo.msgseg)
879			panic("next out of range #1");
880		DPRINTF(("allocating segment %d to message\n", next));
881		free_msgmaps = msgmaps[next].next;
882		nfree_msgmaps--;
883		msgmaps[next].next = msghdr->msg_spot;
884		msghdr->msg_spot = next;
885		segs_needed--;
886	}
887
888	/*
889	 * Copy in the message type
890	 */
891
892	mtx_unlock(&msq_mtx);
893	if ((error = copyin(user_msgp, &msghdr->msg_type,
894	    sizeof(msghdr->msg_type))) != 0) {
895		mtx_lock(&msq_mtx);
896		DPRINTF(("error %d copying the message type\n", error));
897		msg_freehdr(msghdr);
898		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
899		wakeup(msqkptr);
900		goto done2;
901	}
902	mtx_lock(&msq_mtx);
903	user_msgp = (const char *)user_msgp + sizeof(msghdr->msg_type);
904
905	/*
906	 * Validate the message type
907	 */
908
909	if (msghdr->msg_type < 1) {
910		msg_freehdr(msghdr);
911		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
912		wakeup(msqkptr);
913		DPRINTF(("mtype (%d) < 1\n", msghdr->msg_type));
914		error = EINVAL;
915		goto done2;
916	}
917
918	/*
919	 * Copy in the message body
920	 */
921
922	next = msghdr->msg_spot;
923	while (msgsz > 0) {
924		size_t tlen;
925		if (msgsz > msginfo.msgssz)
926			tlen = msginfo.msgssz;
927		else
928			tlen = msgsz;
929		if (next <= -1)
930			panic("next too low #2");
931		if (next >= msginfo.msgseg)
932			panic("next out of range #2");
933		mtx_unlock(&msq_mtx);
934		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
935		    tlen)) != 0) {
936			mtx_lock(&msq_mtx);
937			DPRINTF(("error %d copying in message segment\n",
938			    error));
939			msg_freehdr(msghdr);
940			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
941			wakeup(msqkptr);
942			goto done2;
943		}
944		mtx_lock(&msq_mtx);
945		msgsz -= tlen;
946		user_msgp = (const char *)user_msgp + tlen;
947		next = msgmaps[next].next;
948	}
949	if (next != -1)
950		panic("didn't use all the msg segments");
951
952	/*
953	 * We've got the message.  Unlock the msqid_ds.
954	 */
955
956	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
957
958	/*
959	 * Make sure that the msqid_ds is still allocated.
960	 */
961
962	if (msqkptr->u.msg_qbytes == 0) {
963		msg_freehdr(msghdr);
964		wakeup(msqkptr);
965		error = EIDRM;
966		goto done2;
967	}
968
969#ifdef MAC
970	/*
971	 * Note: Since the task/thread allocates the msghdr and usually
972	 * primes it with its own MAC label, for a majority of policies, it
973	 * won't be necessary to check whether the msghdr has access
974	 * permissions to the msgq.  The mac_check_sysv_msqsnd check would
975	 * suffice in that case.  However, this hook may be required where
976	 * individual policies derive a non-identical label for the msghdr
977	 * from the current thread label and may want to check the msghdr
978	 * enqueue permissions, along with read/write permissions to the
979	 * msgq.
980	 */
981	error = mac_check_sysv_msgmsq(td->td_ucred, msghdr, msqkptr);
982	if (error != 0) {
983		MPRINTF(("mac_check_sysv_msqmsq returned %d\n", error));
984		msg_freehdr(msghdr);
985		wakeup(msqkptr);
986		goto done2;
987	}
988#endif
989
990	/*
991	 * Put the message into the queue
992	 */
993	if (msqkptr->u.msg_first == NULL) {
994		msqkptr->u.msg_first = msghdr;
995		msqkptr->u.msg_last = msghdr;
996	} else {
997		msqkptr->u.msg_last->msg_next = msghdr;
998		msqkptr->u.msg_last = msghdr;
999	}
1000	msqkptr->u.msg_last->msg_next = NULL;
1001
1002	msqkptr->u.msg_cbytes += msghdr->msg_ts;
1003	msqkptr->u.msg_qnum++;
1004	msqkptr->u.msg_lspid = td->td_proc->p_pid;
1005	msqkptr->u.msg_stime = time_second;
1006
1007	wakeup(msqkptr);
1008	td->td_retval[0] = 0;
1009done2:
1010	mtx_unlock(&msq_mtx);
1011	return (error);
1012}
1013
1014#ifndef _SYS_SYSPROTO_H_
1015struct msgrcv_args {
1016	int	msqid;
1017	void	*msgp;
1018	size_t	msgsz;
1019	long	msgtyp;
1020	int	msgflg;
1021};
1022#endif
1023
1024/*
1025 * MPSAFE
1026 */
1027int
1028msgrcv(td, uap)
1029	struct thread *td;
1030	register struct msgrcv_args *uap;
1031{
1032	int msqid = uap->msqid;
1033	void *user_msgp = uap->msgp;
1034	size_t msgsz = uap->msgsz;
1035	long msgtyp = uap->msgtyp;
1036	int msgflg = uap->msgflg;
1037	size_t len;
1038	register struct msqid_kernel *msqkptr;
1039	register struct msg *msghdr;
1040	int error = 0;
1041	short next;
1042
1043	DPRINTF(("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
1044	    msgsz, msgtyp, msgflg));
1045
1046	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
1047		return (ENOSYS);
1048
1049	msqid = IPCID_TO_IX(msqid);
1050
1051	if (msqid < 0 || msqid >= msginfo.msgmni) {
1052		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
1053		    msginfo.msgmni));
1054		return (EINVAL);
1055	}
1056
1057	msqkptr = &msqids[msqid];
1058	mtx_lock(&msq_mtx);
1059	if (msqkptr->u.msg_qbytes == 0) {
1060		DPRINTF(("no such message queue id\n"));
1061		error = EINVAL;
1062		goto done2;
1063	}
1064	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1065		DPRINTF(("wrong sequence number\n"));
1066		error = EINVAL;
1067		goto done2;
1068	}
1069
1070	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
1071		DPRINTF(("requester doesn't have read access\n"));
1072		goto done2;
1073	}
1074
1075#ifdef MAC
1076	error = mac_check_sysv_msqrcv(td->td_ucred, msqkptr);
1077	if (error != 0) {
1078		MPRINTF(("mac_check_sysv_msqrcv returned %d\n", error));
1079		goto done2;
1080	}
1081#endif
1082
1083	msghdr = NULL;
1084	while (msghdr == NULL) {
1085		if (msgtyp == 0) {
1086			msghdr = msqkptr->u.msg_first;
1087			if (msghdr != NULL) {
1088				if (msgsz < msghdr->msg_ts &&
1089				    (msgflg & MSG_NOERROR) == 0) {
1090					DPRINTF(("first message on the queue "
1091					    "is too big (want %d, got %d)\n",
1092					    msgsz, msghdr->msg_ts));
1093					error = E2BIG;
1094					goto done2;
1095				}
1096#ifdef MAC
1097				error = mac_check_sysv_msgrcv(td->td_ucred,
1098				    msghdr);
1099				if (error != 0) {
1100					MPRINTF(("mac_check_sysv_msgrcv "
1101					    "returned %d\n", error));
1102					goto done2;
1103				}
1104#endif
1105				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
1106					msqkptr->u.msg_first = NULL;
1107					msqkptr->u.msg_last = NULL;
1108				} else {
1109					msqkptr->u.msg_first = msghdr->msg_next;
1110					if (msqkptr->u.msg_first == NULL)
1111						panic("msg_first/last screwed up #1");
1112				}
1113			}
1114		} else {
1115			struct msg *previous;
1116			struct msg **prev;
1117
1118			previous = NULL;
1119			prev = &(msqkptr->u.msg_first);
1120			while ((msghdr = *prev) != NULL) {
1121				/*
1122				 * Is this message's type an exact match or is
1123				 * this message's type less than or equal to
1124				 * the absolute value of a negative msgtyp?
1125				 * Note that the second half of this test can
1126				 * NEVER be true if msgtyp is positive since
1127				 * msg_type is always positive!
1128				 */
1129
1130				if (msgtyp == msghdr->msg_type ||
1131				    msghdr->msg_type <= -msgtyp) {
1132					DPRINTF(("found message type %d, "
1133					    "requested %d\n",
1134					    msghdr->msg_type, msgtyp));
1135					if (msgsz < msghdr->msg_ts &&
1136					    (msgflg & MSG_NOERROR) == 0) {
1137						DPRINTF(("requested message "
1138						    "on the queue is too big "
1139						    "(want %d, got %d)\n",
1140						    msgsz, msghdr->msg_ts));
1141						error = E2BIG;
1142						goto done2;
1143					}
1144#ifdef MAC
1145					error = mac_check_sysv_msgrcv(
1146					    td->td_ucred, msghdr);
1147					if (error != 0) {
1148						MPRINTF(("mac_check_sysv_"
1149						    "msgrcv returned %d\n",
1150						    error));
1151						goto done2;
1152					}
1153#endif
1154					*prev = msghdr->msg_next;
1155					if (msghdr == msqkptr->u.msg_last) {
1156						if (previous == NULL) {
1157							if (prev !=
1158							    &msqkptr->u.msg_first)
1159								panic("msg_first/last screwed up #2");
1160							msqkptr->u.msg_first =
1161							    NULL;
1162							msqkptr->u.msg_last =
1163							    NULL;
1164						} else {
1165							if (prev ==
1166							    &msqkptr->u.msg_first)
1167								panic("msg_first/last screwed up #3");
1168							msqkptr->u.msg_last =
1169							    previous;
1170						}
1171					}
1172					break;
1173				}
1174				previous = msghdr;
1175				prev = &(msghdr->msg_next);
1176			}
1177		}
1178
1179		/*
1180		 * We've either extracted the msghdr for the appropriate
1181		 * message or there isn't one.
1182		 * If there is one then bail out of this loop.
1183		 */
1184
1185		if (msghdr != NULL)
1186			break;
1187
1188		/*
1189		 * Hmph!  No message found.  Does the user want to wait?
1190		 */
1191
1192		if ((msgflg & IPC_NOWAIT) != 0) {
1193			DPRINTF(("no appropriate message found (msgtyp=%d)\n",
1194			    msgtyp));
1195			/* The SVID says to return ENOMSG. */
1196			error = ENOMSG;
1197			goto done2;
1198		}
1199
1200		/*
1201		 * Wait for something to happen
1202		 */
1203
1204		DPRINTF(("msgrcv:  goodnight\n"));
1205		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1206		    "msgwait", 0);
1207		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1208
1209		if (error != 0) {
1210			DPRINTF(("msgsnd:  interrupted system call\n"));
1211			error = EINTR;
1212			goto done2;
1213		}
1214
1215		/*
1216		 * Make sure that the msq queue still exists
1217		 */
1218
1219		if (msqkptr->u.msg_qbytes == 0 ||
1220		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1221			DPRINTF(("msqid deleted\n"));
1222			error = EIDRM;
1223			goto done2;
1224		}
1225	}
1226
1227	/*
1228	 * Return the message to the user.
1229	 *
1230	 * First, do the bookkeeping (before we risk being interrupted).
1231	 */
1232
1233	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1234	msqkptr->u.msg_qnum--;
1235	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1236	msqkptr->u.msg_rtime = time_second;
1237
1238	/*
1239	 * Make msgsz the actual amount that we'll be returning.
1240	 * Note that this effectively truncates the message if it is too long
1241	 * (since msgsz is never increased).
1242	 */
1243
1244	DPRINTF(("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1245	    msghdr->msg_ts));
1246	if (msgsz > msghdr->msg_ts)
1247		msgsz = msghdr->msg_ts;
1248
1249	/*
1250	 * Return the type to the user.
1251	 */
1252
1253	mtx_unlock(&msq_mtx);
1254	error = copyout(&(msghdr->msg_type), user_msgp,
1255	    sizeof(msghdr->msg_type));
1256	mtx_lock(&msq_mtx);
1257	if (error != 0) {
1258		DPRINTF(("error (%d) copying out message type\n", error));
1259		msg_freehdr(msghdr);
1260		wakeup(msqkptr);
1261		goto done2;
1262	}
1263	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1264
1265	/*
1266	 * Return the segments to the user
1267	 */
1268
1269	next = msghdr->msg_spot;
1270	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1271		size_t tlen;
1272
1273		if (msgsz - len > msginfo.msgssz)
1274			tlen = msginfo.msgssz;
1275		else
1276			tlen = msgsz - len;
1277		if (next <= -1)
1278			panic("next too low #3");
1279		if (next >= msginfo.msgseg)
1280			panic("next out of range #3");
1281		mtx_unlock(&msq_mtx);
1282		error = copyout(&msgpool[next * msginfo.msgssz],
1283		    user_msgp, tlen);
1284		mtx_lock(&msq_mtx);
1285		if (error != 0) {
1286			DPRINTF(("error (%d) copying out message segment\n",
1287			    error));
1288			msg_freehdr(msghdr);
1289			wakeup(msqkptr);
1290			goto done2;
1291		}
1292		user_msgp = (char *)user_msgp + tlen;
1293		next = msgmaps[next].next;
1294	}
1295
1296	/*
1297	 * Done, return the actual number of bytes copied out.
1298	 */
1299
1300	msg_freehdr(msghdr);
1301	wakeup(msqkptr);
1302	td->td_retval[0] = msgsz;
1303done2:
1304	mtx_unlock(&msq_mtx);
1305	return (error);
1306}
1307
1308static int
1309sysctl_msqids(SYSCTL_HANDLER_ARGS)
1310{
1311
1312	return (SYSCTL_OUT(req, msqids,
1313	    sizeof(struct msqid_kernel) * msginfo.msgmni));
1314}
1315
1316SYSCTL_DECL(_kern_ipc);
1317SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1318SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0, "");
1319SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0, "");
1320SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0, "");
1321SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0, "");
1322SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0, "");
1323SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1324    NULL, 0, sysctl_msqids, "", "Message queue IDs");
1325