sysv_msg.c revision 164368
1202283Slulf/*-
2202283Slulf * Implementation of SVID messages
3202283Slulf *
4202283Slulf * Author:  Daniel Boulet
5202283Slulf *
6202283Slulf * Copyright 1993 Daniel Boulet and RTMX Inc.
7202283Slulf *
8202283Slulf * This system call was implemented by Daniel Boulet under contract from RTMX.
9202283Slulf *
10202283Slulf * Redistribution and use in source forms, with and without modification,
11202283Slulf * are permitted provided that this entire comment appears intact.
12202283Slulf *
13202283Slulf * Redistribution in binary form may occur without any restrictions.
14202283Slulf * Obviously, it would be nice if you gave credit where credit is due
15202283Slulf * but requiring it would be too onerous.
16202283Slulf *
17202283Slulf * This software is provided ``AS IS'' without any warranties of any kind.
18202283Slulf */
19202283Slulf/*-
20202283Slulf * Copyright (c) 2003-2005 McAfee, Inc.
21202283Slulf * All rights reserved.
22202283Slulf *
23202283Slulf * This software was developed for the FreeBSD Project in part by McAfee
24202283Slulf * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
25202283Slulf * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
26202283Slulf * program.
27202283Slulf *
28202283Slulf * Redistribution and use in source and binary forms, with or without
29202283Slulf * modification, are permitted provided that the following conditions
30202283Slulf * are met:
31202283Slulf * 1. Redistributions of source code must retain the above copyright
32202283Slulf *    notice, this list of conditions and the following disclaimer.
33202283Slulf * 2. Redistributions in binary form must reproduce the above copyright
34202283Slulf *    notice, this list of conditions and the following disclaimer in the
35217703Sjhb *    documentation and/or other materials provided with the distribution.
36217703Sjhb *
37217703Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38217703Sjhb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39217703Sjhb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40217703Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41217703Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42217703Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43217703Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44217703Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45217703Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46217703Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47217703Sjhb * SUCH DAMAGE.
48217703Sjhb */
49217703Sjhb
50217703Sjhb#include <sys/cdefs.h>
51217703Sjhb__FBSDID("$FreeBSD: head/sys/kern/sysv_msg.c 164368 2006-11-17 20:43:01Z jkim $");
52202283Slulf
53202283Slulf#include "opt_sysvipc.h"
54202283Slulf#include "opt_mac.h"
55202283Slulf
56202283Slulf#include <sys/param.h>
57202283Slulf#include <sys/systm.h>
58202283Slulf#include <sys/sysproto.h>
59202283Slulf#include <sys/kernel.h>
60202283Slulf#include <sys/priv.h>
61202283Slulf#include <sys/proc.h>
62202283Slulf#include <sys/lock.h>
63202283Slulf#include <sys/mutex.h>
64202283Slulf#include <sys/module.h>
65202283Slulf#include <sys/msg.h>
66202283Slulf#include <sys/syscall.h>
67202283Slulf#include <sys/syscallsubr.h>
68202283Slulf#include <sys/sysent.h>
69221126Sjhb#include <sys/sysctl.h>
70221126Sjhb#include <sys/malloc.h>
71221126Sjhb#include <sys/jail.h>
72221126Sjhb
73221126Sjhb#include <security/mac/mac_framework.h>
74221126Sjhb
75221126Sjhbstatic MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
76221126Sjhb
77221126Sjhbstatic void msginit(void);
78221126Sjhbstatic int msgunload(void);
79221126Sjhbstatic int sysvmsg_modload(struct module *, int, void *);
80221126Sjhb
81221126Sjhb#ifdef MSG_DEBUG
82221126Sjhb#define DPRINTF(a)	printf a
83221126Sjhb#else
84221126Sjhb#define DPRINTF(a)
85221126Sjhb#endif
86221126Sjhb
87221126Sjhbstatic void msg_freehdr(struct msg *msghdr);
88221126Sjhb
89221126Sjhb/* XXX casting to (sy_call_t *) is bogus, as usual. */
90221126Sjhbstatic sy_call_t *msgcalls[] = {
91221126Sjhb	(sy_call_t *)msgctl, (sy_call_t *)msgget,
92202283Slulf	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
93202283Slulf};
94217585Sjhb
95202283Slulf#ifndef MSGSSZ
96#define MSGSSZ	8		/* Each segment must be 2^N long */
97#endif
98#ifndef MSGSEG
99#define MSGSEG	2048		/* must be less than 32767 */
100#endif
101#define MSGMAX	(MSGSSZ*MSGSEG)
102#ifndef MSGMNB
103#define MSGMNB	2048		/* max # of bytes in a queue */
104#endif
105#ifndef MSGMNI
106#define MSGMNI	40
107#endif
108#ifndef MSGTQL
109#define MSGTQL	40
110#endif
111
112/*
113 * Based on the configuration parameters described in an SVR2 (yes, two)
114 * config(1m) man page.
115 *
116 * Each message is broken up and stored in segments that are msgssz bytes
117 * long.  For efficiency reasons, this should be a power of two.  Also,
118 * it doesn't make sense if it is less than 8 or greater than about 256.
119 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
120 * two between 8 and 1024 inclusive (and panic's if it isn't).
121 */
122struct msginfo msginfo = {
123                MSGMAX,         /* max chars in a message */
124                MSGMNI,         /* # of message queue identifiers */
125                MSGMNB,         /* max chars in a queue */
126                MSGTQL,         /* max messages in system */
127                MSGSSZ,         /* size of a message segment */
128                		/* (must be small power of 2 greater than 4) */
129                MSGSEG          /* number of message segments */
130};
131
132/*
133 * macros to convert between msqid_ds's and msqid's.
134 * (specific to this implementation)
135 */
136#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
137#define MSQID_IX(id)	((id) & 0xffff)
138#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
139
140/*
141 * The rest of this file is specific to this particular implementation.
142 */
143
144struct msgmap {
145	short	next;		/* next segment in buffer */
146    				/* -1 -> available */
147    				/* 0..(MSGSEG-1) -> index of next segment */
148};
149
150#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
151
152static int nfree_msgmaps;	/* # of free map entries */
153static short free_msgmaps;	/* head of linked list of free map entries */
154static struct msg *free_msghdrs;/* list of free msg headers */
155static char *msgpool;		/* MSGMAX byte long msg buffer pool */
156static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
157static struct msg *msghdrs;	/* MSGTQL msg headers */
158static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
159static struct mtx msq_mtx;	/* global mutex for message queues. */
160
161static void
162msginit()
163{
164	register int i;
165
166	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
167	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
168	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
169	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
170	TUNABLE_INT_FETCH("kern.ipc.msgmnb", &msginfo.msgmnb);
171	TUNABLE_INT_FETCH("kern.ipc.msgtql", &msginfo.msgtql);
172
173	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
174	if (msgpool == NULL)
175		panic("msgpool is NULL");
176	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
177	if (msgmaps == NULL)
178		panic("msgmaps is NULL");
179	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
180	if (msghdrs == NULL)
181		panic("msghdrs is NULL");
182	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
183	    M_WAITOK);
184	if (msqids == NULL)
185		panic("msqids is NULL");
186
187	/*
188	 * msginfo.msgssz should be a power of two for efficiency reasons.
189	 * It is also pretty silly if msginfo.msgssz is less than 8
190	 * or greater than about 256 so ...
191	 */
192
193	i = 8;
194	while (i < 1024 && i != msginfo.msgssz)
195		i <<= 1;
196    	if (i != msginfo.msgssz) {
197		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
198		    msginfo.msgssz));
199		panic("msginfo.msgssz not a small power of 2");
200	}
201
202	if (msginfo.msgseg > 32767) {
203		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
204		panic("msginfo.msgseg > 32767");
205	}
206
207	if (msgmaps == NULL)
208		panic("msgmaps is NULL");
209
210	for (i = 0; i < msginfo.msgseg; i++) {
211		if (i > 0)
212			msgmaps[i-1].next = i;
213		msgmaps[i].next = -1;	/* implies entry is available */
214	}
215	free_msgmaps = 0;
216	nfree_msgmaps = msginfo.msgseg;
217
218	if (msghdrs == NULL)
219		panic("msghdrs is NULL");
220
221	for (i = 0; i < msginfo.msgtql; i++) {
222		msghdrs[i].msg_type = 0;
223		if (i > 0)
224			msghdrs[i-1].msg_next = &msghdrs[i];
225		msghdrs[i].msg_next = NULL;
226#ifdef MAC
227		mac_init_sysv_msgmsg(&msghdrs[i]);
228#endif
229    	}
230	free_msghdrs = &msghdrs[0];
231
232	if (msqids == NULL)
233		panic("msqids is NULL");
234
235	for (i = 0; i < msginfo.msgmni; i++) {
236		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
237		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
238		msqids[i].u.msg_perm.mode = 0;
239#ifdef MAC
240		mac_init_sysv_msgqueue(&msqids[i]);
241#endif
242	}
243	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
244}
245
246static int
247msgunload()
248{
249	struct msqid_kernel *msqkptr;
250	int msqid;
251#ifdef MAC
252	int i;
253#endif
254
255	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
256		/*
257		 * Look for an unallocated and unlocked msqid_ds.
258		 * msqid_ds's can be locked by msgsnd or msgrcv while
259		 * they are copying the message in/out.  We can't
260		 * re-use the entry until they release it.
261		 */
262		msqkptr = &msqids[msqid];
263		if (msqkptr->u.msg_qbytes != 0 ||
264		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
265			break;
266	}
267	if (msqid != msginfo.msgmni)
268		return (EBUSY);
269
270#ifdef MAC
271	for (i = 0; i < msginfo.msgtql; i++)
272		mac_destroy_sysv_msgmsg(&msghdrs[i]);
273	for (msqid = 0; msqid < msginfo.msgmni; msqid++)
274		mac_destroy_sysv_msgqueue(&msqids[msqid]);
275#endif
276	free(msgpool, M_MSG);
277	free(msgmaps, M_MSG);
278	free(msghdrs, M_MSG);
279	free(msqids, M_MSG);
280	mtx_destroy(&msq_mtx);
281	return (0);
282}
283
284
285static int
286sysvmsg_modload(struct module *module, int cmd, void *arg)
287{
288	int error = 0;
289
290	switch (cmd) {
291	case MOD_LOAD:
292		msginit();
293		break;
294	case MOD_UNLOAD:
295		error = msgunload();
296		break;
297	case MOD_SHUTDOWN:
298		break;
299	default:
300		error = EINVAL;
301		break;
302	}
303	return (error);
304}
305
306static moduledata_t sysvmsg_mod = {
307	"sysvmsg",
308	&sysvmsg_modload,
309	NULL
310};
311
312SYSCALL_MODULE_HELPER(msgsys);
313SYSCALL_MODULE_HELPER(msgctl);
314SYSCALL_MODULE_HELPER(msgget);
315SYSCALL_MODULE_HELPER(msgsnd);
316SYSCALL_MODULE_HELPER(msgrcv);
317
318DECLARE_MODULE(sysvmsg, sysvmsg_mod,
319	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
320MODULE_VERSION(sysvmsg, 1);
321
322/*
323 * Entry point for all MSG calls
324 *
325 * MPSAFE
326 */
327int
328msgsys(td, uap)
329	struct thread *td;
330	/* XXX actually varargs. */
331	struct msgsys_args /* {
332		int	which;
333		int	a2;
334		int	a3;
335		int	a4;
336		int	a5;
337		int	a6;
338	} */ *uap;
339{
340	int error;
341
342	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
343		return (ENOSYS);
344	if (uap->which < 0 ||
345	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
346		return (EINVAL);
347	error = (*msgcalls[uap->which])(td, &uap->a2);
348	return (error);
349}
350
351static void
352msg_freehdr(msghdr)
353	struct msg *msghdr;
354{
355	while (msghdr->msg_ts > 0) {
356		short next;
357		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
358			panic("msghdr->msg_spot out of range");
359		next = msgmaps[msghdr->msg_spot].next;
360		msgmaps[msghdr->msg_spot].next = free_msgmaps;
361		free_msgmaps = msghdr->msg_spot;
362		nfree_msgmaps++;
363		msghdr->msg_spot = next;
364		if (msghdr->msg_ts >= msginfo.msgssz)
365			msghdr->msg_ts -= msginfo.msgssz;
366		else
367			msghdr->msg_ts = 0;
368	}
369	if (msghdr->msg_spot != -1)
370		panic("msghdr->msg_spot != -1");
371	msghdr->msg_next = free_msghdrs;
372	free_msghdrs = msghdr;
373#ifdef MAC
374	mac_cleanup_sysv_msgmsg(msghdr);
375#endif
376}
377
378#ifndef _SYS_SYSPROTO_H_
379struct msgctl_args {
380	int	msqid;
381	int	cmd;
382	struct	msqid_ds *buf;
383};
384#endif
385
386/*
387 * MPSAFE
388 */
389int
390msgctl(td, uap)
391	struct thread *td;
392	register struct msgctl_args *uap;
393{
394	int msqid = uap->msqid;
395	int cmd = uap->cmd;
396	struct msqid_ds msqbuf;
397	int error;
398
399	DPRINTF(("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, uap->buf));
400	if (cmd == IPC_SET &&
401	    (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0)
402		return (error);
403	error = kern_msgctl(td, msqid, cmd, &msqbuf);
404	if (cmd == IPC_STAT && error == 0)
405		error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds));
406	return (error);
407}
408
409int
410kern_msgctl(td, msqid, cmd, msqbuf)
411	struct thread *td;
412	int msqid;
413	int cmd;
414	struct msqid_ds *msqbuf;
415{
416	int rval, error, msqix;
417	register struct msqid_kernel *msqkptr;
418
419	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
420		return (ENOSYS);
421
422	msqix = IPCID_TO_IX(msqid);
423
424	if (msqix < 0 || msqix >= msginfo.msgmni) {
425		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
426		    msginfo.msgmni));
427		return (EINVAL);
428	}
429
430	msqkptr = &msqids[msqix];
431
432	mtx_lock(&msq_mtx);
433	if (msqkptr->u.msg_qbytes == 0) {
434		DPRINTF(("no such msqid\n"));
435		error = EINVAL;
436		goto done2;
437	}
438	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
439		DPRINTF(("wrong sequence number\n"));
440		error = EINVAL;
441		goto done2;
442	}
443#ifdef MAC
444	error = mac_check_sysv_msqctl(td->td_ucred, msqkptr, cmd);
445	if (error != 0)
446		goto done2;
447#endif
448
449	error = 0;
450	rval = 0;
451
452	switch (cmd) {
453
454	case IPC_RMID:
455	{
456		struct msg *msghdr;
457		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
458			goto done2;
459
460#ifdef MAC
461		/*
462		 * Check that the thread has MAC access permissions to
463		 * individual msghdrs.  Note: We need to do this in a
464		 * separate loop because the actual loop alters the
465		 * msq/msghdr info as it progresses, and there is no going
466		 * back if half the way through we discover that the
467		 * thread cannot free a certain msghdr.  The msq will get
468		 * into an inconsistent state.
469		 */
470		for (msghdr = msqkptr->u.msg_first; msghdr != NULL;
471		    msghdr = msghdr->msg_next) {
472			error = mac_check_sysv_msgrmid(td->td_ucred, msghdr);
473			if (error != 0)
474				goto done2;
475		}
476#endif
477
478		/* Free the message headers */
479		msghdr = msqkptr->u.msg_first;
480		while (msghdr != NULL) {
481			struct msg *msghdr_tmp;
482
483			/* Free the segments of each message */
484			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
485			msqkptr->u.msg_qnum--;
486			msghdr_tmp = msghdr;
487			msghdr = msghdr->msg_next;
488			msg_freehdr(msghdr_tmp);
489		}
490
491		if (msqkptr->u.msg_cbytes != 0)
492			panic("msg_cbytes is screwed up");
493		if (msqkptr->u.msg_qnum != 0)
494			panic("msg_qnum is screwed up");
495
496		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
497
498#ifdef MAC
499		mac_cleanup_sysv_msgqueue(msqkptr);
500#endif
501
502		wakeup(msqkptr);
503	}
504
505		break;
506
507	case IPC_SET:
508		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
509			goto done2;
510		if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) {
511			error = priv_check(td, PRIV_IPC_MSGSIZE);
512			if (error)
513				goto done2;
514		}
515		if (msqbuf->msg_qbytes > msginfo.msgmnb) {
516			DPRINTF(("can't increase msg_qbytes beyond %d"
517			    "(truncating)\n", msginfo.msgmnb));
518			msqbuf->msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
519		}
520		if (msqbuf->msg_qbytes == 0) {
521			DPRINTF(("can't reduce msg_qbytes to 0\n"));
522			error = EINVAL;		/* non-standard errno! */
523			goto done2;
524		}
525		msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid;	/* change the owner */
526		msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid;	/* change the owner */
527		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
528		    (msqbuf->msg_perm.mode & 0777);
529		msqkptr->u.msg_qbytes = msqbuf->msg_qbytes;
530		msqkptr->u.msg_ctime = time_second;
531		break;
532
533	case IPC_STAT:
534		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
535			DPRINTF(("requester doesn't have read access\n"));
536			goto done2;
537		}
538		*msqbuf = msqkptr->u;
539		break;
540
541	default:
542		DPRINTF(("invalid command %d\n", cmd));
543		error = EINVAL;
544		goto done2;
545	}
546
547	if (error == 0)
548		td->td_retval[0] = rval;
549done2:
550	mtx_unlock(&msq_mtx);
551	return (error);
552}
553
554#ifndef _SYS_SYSPROTO_H_
555struct msgget_args {
556	key_t	key;
557	int	msgflg;
558};
559#endif
560
561/*
562 * MPSAFE
563 */
564int
565msgget(td, uap)
566	struct thread *td;
567	register struct msgget_args *uap;
568{
569	int msqid, error = 0;
570	int key = uap->key;
571	int msgflg = uap->msgflg;
572	struct ucred *cred = td->td_ucred;
573	register struct msqid_kernel *msqkptr = NULL;
574
575	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
576
577	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
578		return (ENOSYS);
579
580	mtx_lock(&msq_mtx);
581	if (key != IPC_PRIVATE) {
582		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
583			msqkptr = &msqids[msqid];
584			if (msqkptr->u.msg_qbytes != 0 &&
585			    msqkptr->u.msg_perm.key == key)
586				break;
587		}
588		if (msqid < msginfo.msgmni) {
589			DPRINTF(("found public key\n"));
590			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
591				DPRINTF(("not exclusive\n"));
592				error = EEXIST;
593				goto done2;
594			}
595			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
596			    msgflg & 0700))) {
597				DPRINTF(("requester doesn't have 0%o access\n",
598				    msgflg & 0700));
599				goto done2;
600			}
601#ifdef MAC
602			error = mac_check_sysv_msqget(cred, msqkptr);
603			if (error != 0)
604				goto done2;
605#endif
606			goto found;
607		}
608	}
609
610	DPRINTF(("need to allocate the msqid_ds\n"));
611	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
612		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
613			/*
614			 * Look for an unallocated and unlocked msqid_ds.
615			 * msqid_ds's can be locked by msgsnd or msgrcv while
616			 * they are copying the message in/out.  We can't
617			 * re-use the entry until they release it.
618			 */
619			msqkptr = &msqids[msqid];
620			if (msqkptr->u.msg_qbytes == 0 &&
621			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
622				break;
623		}
624		if (msqid == msginfo.msgmni) {
625			DPRINTF(("no more msqid_ds's available\n"));
626			error = ENOSPC;
627			goto done2;
628		}
629		DPRINTF(("msqid %d is available\n", msqid));
630		msqkptr->u.msg_perm.key = key;
631		msqkptr->u.msg_perm.cuid = cred->cr_uid;
632		msqkptr->u.msg_perm.uid = cred->cr_uid;
633		msqkptr->u.msg_perm.cgid = cred->cr_gid;
634		msqkptr->u.msg_perm.gid = cred->cr_gid;
635		msqkptr->u.msg_perm.mode = (msgflg & 0777);
636		/* Make sure that the returned msqid is unique */
637		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
638		msqkptr->u.msg_first = NULL;
639		msqkptr->u.msg_last = NULL;
640		msqkptr->u.msg_cbytes = 0;
641		msqkptr->u.msg_qnum = 0;
642		msqkptr->u.msg_qbytes = msginfo.msgmnb;
643		msqkptr->u.msg_lspid = 0;
644		msqkptr->u.msg_lrpid = 0;
645		msqkptr->u.msg_stime = 0;
646		msqkptr->u.msg_rtime = 0;
647		msqkptr->u.msg_ctime = time_second;
648#ifdef MAC
649		mac_create_sysv_msgqueue(cred, msqkptr);
650#endif
651	} else {
652		DPRINTF(("didn't find it and wasn't asked to create it\n"));
653		error = ENOENT;
654		goto done2;
655	}
656
657found:
658	/* Construct the unique msqid */
659	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
660done2:
661	mtx_unlock(&msq_mtx);
662	return (error);
663}
664
665#ifndef _SYS_SYSPROTO_H_
666struct msgsnd_args {
667	int	msqid;
668	const void	*msgp;
669	size_t	msgsz;
670	int	msgflg;
671};
672#endif
673
674/*
675 * MPSAFE
676 */
677int
678msgsnd(td, uap)
679	struct thread *td;
680	register struct msgsnd_args *uap;
681{
682	int msqid = uap->msqid;
683	const void *user_msgp = uap->msgp;
684	size_t msgsz = uap->msgsz;
685	int msgflg = uap->msgflg;
686	int segs_needed, error = 0;
687	register struct msqid_kernel *msqkptr;
688	register struct msg *msghdr;
689	short next;
690
691	DPRINTF(("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
692	    msgflg));
693	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
694		return (ENOSYS);
695
696	mtx_lock(&msq_mtx);
697	msqid = IPCID_TO_IX(msqid);
698
699	if (msqid < 0 || msqid >= msginfo.msgmni) {
700		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
701		    msginfo.msgmni));
702		error = EINVAL;
703		goto done2;
704	}
705
706	msqkptr = &msqids[msqid];
707	if (msqkptr->u.msg_qbytes == 0) {
708		DPRINTF(("no such message queue id\n"));
709		error = EINVAL;
710		goto done2;
711	}
712	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
713		DPRINTF(("wrong sequence number\n"));
714		error = EINVAL;
715		goto done2;
716	}
717
718	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
719		DPRINTF(("requester doesn't have write access\n"));
720		goto done2;
721	}
722
723#ifdef MAC
724	error = mac_check_sysv_msqsnd(td->td_ucred, msqkptr);
725	if (error != 0)
726		goto done2;
727#endif
728
729	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
730	DPRINTF(("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
731	    segs_needed));
732	for (;;) {
733		int need_more_resources = 0;
734
735		/*
736		 * check msgsz
737		 * (inside this loop in case msg_qbytes changes while we sleep)
738		 */
739
740		if (msgsz > msqkptr->u.msg_qbytes) {
741			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
742			error = EINVAL;
743			goto done2;
744		}
745
746		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
747			DPRINTF(("msqid is locked\n"));
748			need_more_resources = 1;
749		}
750		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
751			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
752			need_more_resources = 1;
753		}
754		if (segs_needed > nfree_msgmaps) {
755			DPRINTF(("segs_needed > nfree_msgmaps\n"));
756			need_more_resources = 1;
757		}
758		if (free_msghdrs == NULL) {
759			DPRINTF(("no more msghdrs\n"));
760			need_more_resources = 1;
761		}
762
763		if (need_more_resources) {
764			int we_own_it;
765
766			if ((msgflg & IPC_NOWAIT) != 0) {
767				DPRINTF(("need more resources but caller "
768				    "doesn't want to wait\n"));
769				error = EAGAIN;
770				goto done2;
771			}
772
773			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
774				DPRINTF(("we don't own the msqid_ds\n"));
775				we_own_it = 0;
776			} else {
777				/* Force later arrivals to wait for our
778				   request */
779				DPRINTF(("we own the msqid_ds\n"));
780				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
781				we_own_it = 1;
782			}
783			DPRINTF(("msgsnd:  goodnight\n"));
784			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
785			    "msgsnd", hz);
786			DPRINTF(("msgsnd:  good morning, error=%d\n", error));
787			if (we_own_it)
788				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
789			if (error == EWOULDBLOCK) {
790				DPRINTF(("msgsnd:  timed out\n"));
791				continue;
792			}
793			if (error != 0) {
794				DPRINTF(("msgsnd:  interrupted system call\n"));
795				error = EINTR;
796				goto done2;
797			}
798
799			/*
800			 * Make sure that the msq queue still exists
801			 */
802
803			if (msqkptr->u.msg_qbytes == 0) {
804				DPRINTF(("msqid deleted\n"));
805				error = EIDRM;
806				goto done2;
807			}
808
809		} else {
810			DPRINTF(("got all the resources that we need\n"));
811			break;
812		}
813	}
814
815	/*
816	 * We have the resources that we need.
817	 * Make sure!
818	 */
819
820	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
821		panic("msg_perm.mode & MSG_LOCKED");
822	if (segs_needed > nfree_msgmaps)
823		panic("segs_needed > nfree_msgmaps");
824	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
825		panic("msgsz + msg_cbytes > msg_qbytes");
826	if (free_msghdrs == NULL)
827		panic("no more msghdrs");
828
829	/*
830	 * Re-lock the msqid_ds in case we page-fault when copying in the
831	 * message
832	 */
833
834	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
835		panic("msqid_ds is already locked");
836	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
837
838	/*
839	 * Allocate a message header
840	 */
841
842	msghdr = free_msghdrs;
843	free_msghdrs = msghdr->msg_next;
844	msghdr->msg_spot = -1;
845	msghdr->msg_ts = msgsz;
846#ifdef MAC
847	/*
848	 * XXXMAC: Should the mac_check_sysv_msgmsq check follow here
849	 * immediately?  Or, should it be checked just before the msg is
850	 * enqueued in the msgq (as it is done now)?
851	 */
852	mac_create_sysv_msgmsg(td->td_ucred, msqkptr, msghdr);
853#endif
854
855	/*
856	 * Allocate space for the message
857	 */
858
859	while (segs_needed > 0) {
860		if (nfree_msgmaps <= 0)
861			panic("not enough msgmaps");
862		if (free_msgmaps == -1)
863			panic("nil free_msgmaps");
864		next = free_msgmaps;
865		if (next <= -1)
866			panic("next too low #1");
867		if (next >= msginfo.msgseg)
868			panic("next out of range #1");
869		DPRINTF(("allocating segment %d to message\n", next));
870		free_msgmaps = msgmaps[next].next;
871		nfree_msgmaps--;
872		msgmaps[next].next = msghdr->msg_spot;
873		msghdr->msg_spot = next;
874		segs_needed--;
875	}
876
877	/*
878	 * Copy in the message type
879	 */
880
881	mtx_unlock(&msq_mtx);
882	if ((error = copyin(user_msgp, &msghdr->msg_type,
883	    sizeof(msghdr->msg_type))) != 0) {
884		mtx_lock(&msq_mtx);
885		DPRINTF(("error %d copying the message type\n", error));
886		msg_freehdr(msghdr);
887		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
888		wakeup(msqkptr);
889		goto done2;
890	}
891	mtx_lock(&msq_mtx);
892	user_msgp = (const char *)user_msgp + sizeof(msghdr->msg_type);
893
894	/*
895	 * Validate the message type
896	 */
897
898	if (msghdr->msg_type < 1) {
899		msg_freehdr(msghdr);
900		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
901		wakeup(msqkptr);
902		DPRINTF(("mtype (%d) < 1\n", msghdr->msg_type));
903		error = EINVAL;
904		goto done2;
905	}
906
907	/*
908	 * Copy in the message body
909	 */
910
911	next = msghdr->msg_spot;
912	while (msgsz > 0) {
913		size_t tlen;
914		if (msgsz > msginfo.msgssz)
915			tlen = msginfo.msgssz;
916		else
917			tlen = msgsz;
918		if (next <= -1)
919			panic("next too low #2");
920		if (next >= msginfo.msgseg)
921			panic("next out of range #2");
922		mtx_unlock(&msq_mtx);
923		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
924		    tlen)) != 0) {
925			mtx_lock(&msq_mtx);
926			DPRINTF(("error %d copying in message segment\n",
927			    error));
928			msg_freehdr(msghdr);
929			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
930			wakeup(msqkptr);
931			goto done2;
932		}
933		mtx_lock(&msq_mtx);
934		msgsz -= tlen;
935		user_msgp = (const char *)user_msgp + tlen;
936		next = msgmaps[next].next;
937	}
938	if (next != -1)
939		panic("didn't use all the msg segments");
940
941	/*
942	 * We've got the message.  Unlock the msqid_ds.
943	 */
944
945	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
946
947	/*
948	 * Make sure that the msqid_ds is still allocated.
949	 */
950
951	if (msqkptr->u.msg_qbytes == 0) {
952		msg_freehdr(msghdr);
953		wakeup(msqkptr);
954		error = EIDRM;
955		goto done2;
956	}
957
958#ifdef MAC
959	/*
960	 * Note: Since the task/thread allocates the msghdr and usually
961	 * primes it with its own MAC label, for a majority of policies, it
962	 * won't be necessary to check whether the msghdr has access
963	 * permissions to the msgq.  The mac_check_sysv_msqsnd check would
964	 * suffice in that case.  However, this hook may be required where
965	 * individual policies derive a non-identical label for the msghdr
966	 * from the current thread label and may want to check the msghdr
967	 * enqueue permissions, along with read/write permissions to the
968	 * msgq.
969	 */
970	error = mac_check_sysv_msgmsq(td->td_ucred, msghdr, msqkptr);
971	if (error != 0) {
972		msg_freehdr(msghdr);
973		wakeup(msqkptr);
974		goto done2;
975	}
976#endif
977
978	/*
979	 * Put the message into the queue
980	 */
981	if (msqkptr->u.msg_first == NULL) {
982		msqkptr->u.msg_first = msghdr;
983		msqkptr->u.msg_last = msghdr;
984	} else {
985		msqkptr->u.msg_last->msg_next = msghdr;
986		msqkptr->u.msg_last = msghdr;
987	}
988	msqkptr->u.msg_last->msg_next = NULL;
989
990	msqkptr->u.msg_cbytes += msghdr->msg_ts;
991	msqkptr->u.msg_qnum++;
992	msqkptr->u.msg_lspid = td->td_proc->p_pid;
993	msqkptr->u.msg_stime = time_second;
994
995	wakeup(msqkptr);
996	td->td_retval[0] = 0;
997done2:
998	mtx_unlock(&msq_mtx);
999	return (error);
1000}
1001
1002#ifndef _SYS_SYSPROTO_H_
1003struct msgrcv_args {
1004	int	msqid;
1005	void	*msgp;
1006	size_t	msgsz;
1007	long	msgtyp;
1008	int	msgflg;
1009};
1010#endif
1011
1012/*
1013 * MPSAFE
1014 */
1015int
1016msgrcv(td, uap)
1017	struct thread *td;
1018	register struct msgrcv_args *uap;
1019{
1020	int msqid = uap->msqid;
1021	void *user_msgp = uap->msgp;
1022	size_t msgsz = uap->msgsz;
1023	long msgtyp = uap->msgtyp;
1024	int msgflg = uap->msgflg;
1025	size_t len;
1026	register struct msqid_kernel *msqkptr;
1027	register struct msg *msghdr;
1028	int error = 0;
1029	short next;
1030
1031	DPRINTF(("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
1032	    msgsz, msgtyp, msgflg));
1033
1034	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
1035		return (ENOSYS);
1036
1037	msqid = IPCID_TO_IX(msqid);
1038
1039	if (msqid < 0 || msqid >= msginfo.msgmni) {
1040		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
1041		    msginfo.msgmni));
1042		return (EINVAL);
1043	}
1044
1045	msqkptr = &msqids[msqid];
1046	mtx_lock(&msq_mtx);
1047	if (msqkptr->u.msg_qbytes == 0) {
1048		DPRINTF(("no such message queue id\n"));
1049		error = EINVAL;
1050		goto done2;
1051	}
1052	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1053		DPRINTF(("wrong sequence number\n"));
1054		error = EINVAL;
1055		goto done2;
1056	}
1057
1058	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
1059		DPRINTF(("requester doesn't have read access\n"));
1060		goto done2;
1061	}
1062
1063#ifdef MAC
1064	error = mac_check_sysv_msqrcv(td->td_ucred, msqkptr);
1065	if (error != 0)
1066		goto done2;
1067#endif
1068
1069	msghdr = NULL;
1070	while (msghdr == NULL) {
1071		if (msgtyp == 0) {
1072			msghdr = msqkptr->u.msg_first;
1073			if (msghdr != NULL) {
1074				if (msgsz < msghdr->msg_ts &&
1075				    (msgflg & MSG_NOERROR) == 0) {
1076					DPRINTF(("first message on the queue "
1077					    "is too big (want %d, got %d)\n",
1078					    msgsz, msghdr->msg_ts));
1079					error = E2BIG;
1080					goto done2;
1081				}
1082#ifdef MAC
1083				error = mac_check_sysv_msgrcv(td->td_ucred,
1084				    msghdr);
1085				if (error != 0)
1086					goto done2;
1087#endif
1088				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
1089					msqkptr->u.msg_first = NULL;
1090					msqkptr->u.msg_last = NULL;
1091				} else {
1092					msqkptr->u.msg_first = msghdr->msg_next;
1093					if (msqkptr->u.msg_first == NULL)
1094						panic("msg_first/last screwed up #1");
1095				}
1096			}
1097		} else {
1098			struct msg *previous;
1099			struct msg **prev;
1100
1101			previous = NULL;
1102			prev = &(msqkptr->u.msg_first);
1103			while ((msghdr = *prev) != NULL) {
1104				/*
1105				 * Is this message's type an exact match or is
1106				 * this message's type less than or equal to
1107				 * the absolute value of a negative msgtyp?
1108				 * Note that the second half of this test can
1109				 * NEVER be true if msgtyp is positive since
1110				 * msg_type is always positive!
1111				 */
1112
1113				if (msgtyp == msghdr->msg_type ||
1114				    msghdr->msg_type <= -msgtyp) {
1115					DPRINTF(("found message type %d, "
1116					    "requested %d\n",
1117					    msghdr->msg_type, msgtyp));
1118					if (msgsz < msghdr->msg_ts &&
1119					    (msgflg & MSG_NOERROR) == 0) {
1120						DPRINTF(("requested message "
1121						    "on the queue is too big "
1122						    "(want %d, got %d)\n",
1123						    msgsz, msghdr->msg_ts));
1124						error = E2BIG;
1125						goto done2;
1126					}
1127#ifdef MAC
1128					error = mac_check_sysv_msgrcv(
1129					    td->td_ucred, msghdr);
1130					if (error != 0)
1131						goto done2;
1132#endif
1133					*prev = msghdr->msg_next;
1134					if (msghdr == msqkptr->u.msg_last) {
1135						if (previous == NULL) {
1136							if (prev !=
1137							    &msqkptr->u.msg_first)
1138								panic("msg_first/last screwed up #2");
1139							msqkptr->u.msg_first =
1140							    NULL;
1141							msqkptr->u.msg_last =
1142							    NULL;
1143						} else {
1144							if (prev ==
1145							    &msqkptr->u.msg_first)
1146								panic("msg_first/last screwed up #3");
1147							msqkptr->u.msg_last =
1148							    previous;
1149						}
1150					}
1151					break;
1152				}
1153				previous = msghdr;
1154				prev = &(msghdr->msg_next);
1155			}
1156		}
1157
1158		/*
1159		 * We've either extracted the msghdr for the appropriate
1160		 * message or there isn't one.
1161		 * If there is one then bail out of this loop.
1162		 */
1163
1164		if (msghdr != NULL)
1165			break;
1166
1167		/*
1168		 * Hmph!  No message found.  Does the user want to wait?
1169		 */
1170
1171		if ((msgflg & IPC_NOWAIT) != 0) {
1172			DPRINTF(("no appropriate message found (msgtyp=%d)\n",
1173			    msgtyp));
1174			/* The SVID says to return ENOMSG. */
1175			error = ENOMSG;
1176			goto done2;
1177		}
1178
1179		/*
1180		 * Wait for something to happen
1181		 */
1182
1183		DPRINTF(("msgrcv:  goodnight\n"));
1184		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1185		    "msgrcv", 0);
1186		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1187
1188		if (error != 0) {
1189			DPRINTF(("msgrcv:  interrupted system call\n"));
1190			error = EINTR;
1191			goto done2;
1192		}
1193
1194		/*
1195		 * Make sure that the msq queue still exists
1196		 */
1197
1198		if (msqkptr->u.msg_qbytes == 0 ||
1199		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1200			DPRINTF(("msqid deleted\n"));
1201			error = EIDRM;
1202			goto done2;
1203		}
1204	}
1205
1206	/*
1207	 * Return the message to the user.
1208	 *
1209	 * First, do the bookkeeping (before we risk being interrupted).
1210	 */
1211
1212	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1213	msqkptr->u.msg_qnum--;
1214	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1215	msqkptr->u.msg_rtime = time_second;
1216
1217	/*
1218	 * Make msgsz the actual amount that we'll be returning.
1219	 * Note that this effectively truncates the message if it is too long
1220	 * (since msgsz is never increased).
1221	 */
1222
1223	DPRINTF(("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1224	    msghdr->msg_ts));
1225	if (msgsz > msghdr->msg_ts)
1226		msgsz = msghdr->msg_ts;
1227
1228	/*
1229	 * Return the type to the user.
1230	 */
1231
1232	mtx_unlock(&msq_mtx);
1233	error = copyout(&(msghdr->msg_type), user_msgp,
1234	    sizeof(msghdr->msg_type));
1235	mtx_lock(&msq_mtx);
1236	if (error != 0) {
1237		DPRINTF(("error (%d) copying out message type\n", error));
1238		msg_freehdr(msghdr);
1239		wakeup(msqkptr);
1240		goto done2;
1241	}
1242	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1243
1244	/*
1245	 * Return the segments to the user
1246	 */
1247
1248	next = msghdr->msg_spot;
1249	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1250		size_t tlen;
1251
1252		if (msgsz - len > msginfo.msgssz)
1253			tlen = msginfo.msgssz;
1254		else
1255			tlen = msgsz - len;
1256		if (next <= -1)
1257			panic("next too low #3");
1258		if (next >= msginfo.msgseg)
1259			panic("next out of range #3");
1260		mtx_unlock(&msq_mtx);
1261		error = copyout(&msgpool[next * msginfo.msgssz],
1262		    user_msgp, tlen);
1263		mtx_lock(&msq_mtx);
1264		if (error != 0) {
1265			DPRINTF(("error (%d) copying out message segment\n",
1266			    error));
1267			msg_freehdr(msghdr);
1268			wakeup(msqkptr);
1269			goto done2;
1270		}
1271		user_msgp = (char *)user_msgp + tlen;
1272		next = msgmaps[next].next;
1273	}
1274
1275	/*
1276	 * Done, return the actual number of bytes copied out.
1277	 */
1278
1279	msg_freehdr(msghdr);
1280	wakeup(msqkptr);
1281	td->td_retval[0] = msgsz;
1282done2:
1283	mtx_unlock(&msq_mtx);
1284	return (error);
1285}
1286
1287static int
1288sysctl_msqids(SYSCTL_HANDLER_ARGS)
1289{
1290
1291	return (SYSCTL_OUT(req, msqids,
1292	    sizeof(struct msqid_kernel) * msginfo.msgmni));
1293}
1294
1295SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1296    "Maximum message size");
1297SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0,
1298    "Number of message queue identifiers");
1299SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0,
1300    "Maximum number of bytes in a queue");
1301SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0,
1302    "Maximum number of messages in the system");
1303SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0,
1304    "Size of a message segment");
1305SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
1306    "Number of message segments");
1307SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1308    NULL, 0, sysctl_msqids, "", "Message queue IDs");
1309