sysv_msg.c revision 194575
1/*-
2 * Implementation of SVID messages
3 *
4 * Author:  Daniel Boulet
5 *
6 * Copyright 1993 Daniel Boulet and RTMX Inc.
7 *
8 * This system call was implemented by Daniel Boulet under contract from RTMX.
9 *
10 * Redistribution and use in source forms, with and without modification,
11 * are permitted provided that this entire comment appears intact.
12 *
13 * Redistribution in binary form may occur without any restrictions.
14 * Obviously, it would be nice if you gave credit where credit is due
15 * but requiring it would be too onerous.
16 *
17 * This software is provided ``AS IS'' without any warranties of any kind.
18 */
19/*-
20 * Copyright (c) 2003-2005 McAfee, Inc.
21 * All rights reserved.
22 *
23 * This software was developed for the FreeBSD Project in part by McAfee
24 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
25 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
26 * program.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 *    notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 *    notice, this list of conditions and the following disclaimer in the
35 *    documentation and/or other materials provided with the distribution.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: head/sys/kern/sysv_msg.c 194575 2009-06-21 07:54:47Z rdivacky $");
52
53#include "opt_sysvipc.h"
54
55#include <sys/param.h>
56#include <sys/systm.h>
57#include <sys/sysproto.h>
58#include <sys/kernel.h>
59#include <sys/priv.h>
60#include <sys/proc.h>
61#include <sys/lock.h>
62#include <sys/mutex.h>
63#include <sys/module.h>
64#include <sys/msg.h>
65#include <sys/syscall.h>
66#include <sys/syscallsubr.h>
67#include <sys/sysent.h>
68#include <sys/sysctl.h>
69#include <sys/malloc.h>
70#include <sys/jail.h>
71
72#include <security/mac/mac_framework.h>
73
74static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
75
76static void msginit(void);
77static int msgunload(void);
78static int sysvmsg_modload(struct module *, int, void *);
79
80#ifdef MSG_DEBUG
81#define DPRINTF(a)	printf a
82#else
83#define DPRINTF(a)	(void)0
84#endif
85
86static void msg_freehdr(struct msg *msghdr);
87
88/* XXX casting to (sy_call_t *) is bogus, as usual. */
89static sy_call_t *msgcalls[] = {
90	(sy_call_t *)msgctl, (sy_call_t *)msgget,
91	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
92};
93
94#ifndef MSGSSZ
95#define MSGSSZ	8		/* Each segment must be 2^N long */
96#endif
97#ifndef MSGSEG
98#define MSGSEG	2048		/* must be less than 32767 */
99#endif
100#define MSGMAX	(MSGSSZ*MSGSEG)
101#ifndef MSGMNB
102#define MSGMNB	2048		/* max # of bytes in a queue */
103#endif
104#ifndef MSGMNI
105#define MSGMNI	40
106#endif
107#ifndef MSGTQL
108#define MSGTQL	40
109#endif
110
111/*
112 * Based on the configuration parameters described in an SVR2 (yes, two)
113 * config(1m) man page.
114 *
115 * Each message is broken up and stored in segments that are msgssz bytes
116 * long.  For efficiency reasons, this should be a power of two.  Also,
117 * it doesn't make sense if it is less than 8 or greater than about 256.
118 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
119 * two between 8 and 1024 inclusive (and panic's if it isn't).
120 */
121struct msginfo msginfo = {
122                MSGMAX,         /* max chars in a message */
123                MSGMNI,         /* # of message queue identifiers */
124                MSGMNB,         /* max chars in a queue */
125                MSGTQL,         /* max messages in system */
126                MSGSSZ,         /* size of a message segment */
127                		/* (must be small power of 2 greater than 4) */
128                MSGSEG          /* number of message segments */
129};
130
131/*
132 * macros to convert between msqid_ds's and msqid's.
133 * (specific to this implementation)
134 */
135#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
136#define MSQID_IX(id)	((id) & 0xffff)
137#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
138
139/*
140 * The rest of this file is specific to this particular implementation.
141 */
142
143struct msgmap {
144	short	next;		/* next segment in buffer */
145    				/* -1 -> available */
146    				/* 0..(MSGSEG-1) -> index of next segment */
147};
148
149#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
150
151static int nfree_msgmaps;	/* # of free map entries */
152static short free_msgmaps;	/* head of linked list of free map entries */
153static struct msg *free_msghdrs;/* list of free msg headers */
154static char *msgpool;		/* MSGMAX byte long msg buffer pool */
155static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
156static struct msg *msghdrs;	/* MSGTQL msg headers */
157static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
158static struct mtx msq_mtx;	/* global mutex for message queues. */
159
160static void
161msginit()
162{
163	register int i;
164
165	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
166	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
167	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
168	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
169	TUNABLE_INT_FETCH("kern.ipc.msgmnb", &msginfo.msgmnb);
170	TUNABLE_INT_FETCH("kern.ipc.msgtql", &msginfo.msgtql);
171
172	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
173	if (msgpool == NULL)
174		panic("msgpool is NULL");
175	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
176	if (msgmaps == NULL)
177		panic("msgmaps is NULL");
178	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
179	if (msghdrs == NULL)
180		panic("msghdrs is NULL");
181	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
182	    M_WAITOK);
183	if (msqids == NULL)
184		panic("msqids is NULL");
185
186	/*
187	 * msginfo.msgssz should be a power of two for efficiency reasons.
188	 * It is also pretty silly if msginfo.msgssz is less than 8
189	 * or greater than about 256 so ...
190	 */
191
192	i = 8;
193	while (i < 1024 && i != msginfo.msgssz)
194		i <<= 1;
195    	if (i != msginfo.msgssz) {
196		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
197		    msginfo.msgssz));
198		panic("msginfo.msgssz not a small power of 2");
199	}
200
201	if (msginfo.msgseg > 32767) {
202		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
203		panic("msginfo.msgseg > 32767");
204	}
205
206	if (msgmaps == NULL)
207		panic("msgmaps is NULL");
208
209	for (i = 0; i < msginfo.msgseg; i++) {
210		if (i > 0)
211			msgmaps[i-1].next = i;
212		msgmaps[i].next = -1;	/* implies entry is available */
213	}
214	free_msgmaps = 0;
215	nfree_msgmaps = msginfo.msgseg;
216
217	if (msghdrs == NULL)
218		panic("msghdrs is NULL");
219
220	for (i = 0; i < msginfo.msgtql; i++) {
221		msghdrs[i].msg_type = 0;
222		if (i > 0)
223			msghdrs[i-1].msg_next = &msghdrs[i];
224		msghdrs[i].msg_next = NULL;
225#ifdef MAC
226		mac_sysvmsg_init(&msghdrs[i]);
227#endif
228    	}
229	free_msghdrs = &msghdrs[0];
230
231	if (msqids == NULL)
232		panic("msqids is NULL");
233
234	for (i = 0; i < msginfo.msgmni; i++) {
235		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
236		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
237		msqids[i].u.msg_perm.mode = 0;
238#ifdef MAC
239		mac_sysvmsq_init(&msqids[i]);
240#endif
241	}
242	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
243}
244
245static int
246msgunload()
247{
248	struct msqid_kernel *msqkptr;
249	int msqid;
250#ifdef MAC
251	int i;
252#endif
253
254	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
255		/*
256		 * Look for an unallocated and unlocked msqid_ds.
257		 * msqid_ds's can be locked by msgsnd or msgrcv while
258		 * they are copying the message in/out.  We can't
259		 * re-use the entry until they release it.
260		 */
261		msqkptr = &msqids[msqid];
262		if (msqkptr->u.msg_qbytes != 0 ||
263		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
264			break;
265	}
266	if (msqid != msginfo.msgmni)
267		return (EBUSY);
268
269#ifdef MAC
270	for (i = 0; i < msginfo.msgtql; i++)
271		mac_sysvmsg_destroy(&msghdrs[i]);
272	for (msqid = 0; msqid < msginfo.msgmni; msqid++)
273		mac_sysvmsq_destroy(&msqids[msqid]);
274#endif
275	free(msgpool, M_MSG);
276	free(msgmaps, M_MSG);
277	free(msghdrs, M_MSG);
278	free(msqids, M_MSG);
279	mtx_destroy(&msq_mtx);
280	return (0);
281}
282
283
284static int
285sysvmsg_modload(struct module *module, int cmd, void *arg)
286{
287	int error = 0;
288
289	switch (cmd) {
290	case MOD_LOAD:
291		msginit();
292		break;
293	case MOD_UNLOAD:
294		error = msgunload();
295		break;
296	case MOD_SHUTDOWN:
297		break;
298	default:
299		error = EINVAL;
300		break;
301	}
302	return (error);
303}
304
305static moduledata_t sysvmsg_mod = {
306	"sysvmsg",
307	&sysvmsg_modload,
308	NULL
309};
310
311SYSCALL_MODULE_HELPER(msgsys);
312SYSCALL_MODULE_HELPER(msgctl);
313SYSCALL_MODULE_HELPER(msgget);
314SYSCALL_MODULE_HELPER(msgsnd);
315SYSCALL_MODULE_HELPER(msgrcv);
316
317DECLARE_MODULE(sysvmsg, sysvmsg_mod,
318	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
319MODULE_VERSION(sysvmsg, 1);
320
321/*
322 * Entry point for all MSG calls.
323 */
324int
325msgsys(td, uap)
326	struct thread *td;
327	/* XXX actually varargs. */
328	struct msgsys_args /* {
329		int	which;
330		int	a2;
331		int	a3;
332		int	a4;
333		int	a5;
334		int	a6;
335	} */ *uap;
336{
337	int error;
338
339	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
340		return (ENOSYS);
341	if (uap->which < 0 ||
342	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
343		return (EINVAL);
344	error = (*msgcalls[uap->which])(td, &uap->a2);
345	return (error);
346}
347
348static void
349msg_freehdr(msghdr)
350	struct msg *msghdr;
351{
352	while (msghdr->msg_ts > 0) {
353		short next;
354		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
355			panic("msghdr->msg_spot out of range");
356		next = msgmaps[msghdr->msg_spot].next;
357		msgmaps[msghdr->msg_spot].next = free_msgmaps;
358		free_msgmaps = msghdr->msg_spot;
359		nfree_msgmaps++;
360		msghdr->msg_spot = next;
361		if (msghdr->msg_ts >= msginfo.msgssz)
362			msghdr->msg_ts -= msginfo.msgssz;
363		else
364			msghdr->msg_ts = 0;
365	}
366	if (msghdr->msg_spot != -1)
367		panic("msghdr->msg_spot != -1");
368	msghdr->msg_next = free_msghdrs;
369	free_msghdrs = msghdr;
370#ifdef MAC
371	mac_sysvmsg_cleanup(msghdr);
372#endif
373}
374
375#ifndef _SYS_SYSPROTO_H_
376struct msgctl_args {
377	int	msqid;
378	int	cmd;
379	struct	msqid_ds *buf;
380};
381#endif
382int
383msgctl(td, uap)
384	struct thread *td;
385	register struct msgctl_args *uap;
386{
387	int msqid = uap->msqid;
388	int cmd = uap->cmd;
389	struct msqid_ds msqbuf;
390	int error;
391
392	DPRINTF(("call to msgctl(%d, %d, %p)\n", msqid, cmd, uap->buf));
393	if (cmd == IPC_SET &&
394	    (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0)
395		return (error);
396	error = kern_msgctl(td, msqid, cmd, &msqbuf);
397	if (cmd == IPC_STAT && error == 0)
398		error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds));
399	return (error);
400}
401
402int
403kern_msgctl(td, msqid, cmd, msqbuf)
404	struct thread *td;
405	int msqid;
406	int cmd;
407	struct msqid_ds *msqbuf;
408{
409	int rval, error, msqix;
410	register struct msqid_kernel *msqkptr;
411
412	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
413		return (ENOSYS);
414
415	msqix = IPCID_TO_IX(msqid);
416
417	if (msqix < 0 || msqix >= msginfo.msgmni) {
418		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
419		    msginfo.msgmni));
420		return (EINVAL);
421	}
422
423	msqkptr = &msqids[msqix];
424
425	mtx_lock(&msq_mtx);
426	if (msqkptr->u.msg_qbytes == 0) {
427		DPRINTF(("no such msqid\n"));
428		error = EINVAL;
429		goto done2;
430	}
431	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
432		DPRINTF(("wrong sequence number\n"));
433		error = EINVAL;
434		goto done2;
435	}
436#ifdef MAC
437	error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
438	if (error != 0)
439		goto done2;
440#endif
441
442	error = 0;
443	rval = 0;
444
445	switch (cmd) {
446
447	case IPC_RMID:
448	{
449		struct msg *msghdr;
450		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
451			goto done2;
452
453#ifdef MAC
454		/*
455		 * Check that the thread has MAC access permissions to
456		 * individual msghdrs.  Note: We need to do this in a
457		 * separate loop because the actual loop alters the
458		 * msq/msghdr info as it progresses, and there is no going
459		 * back if half the way through we discover that the
460		 * thread cannot free a certain msghdr.  The msq will get
461		 * into an inconsistent state.
462		 */
463		for (msghdr = msqkptr->u.msg_first; msghdr != NULL;
464		    msghdr = msghdr->msg_next) {
465			error = mac_sysvmsq_check_msgrmid(td->td_ucred, msghdr);
466			if (error != 0)
467				goto done2;
468		}
469#endif
470
471		/* Free the message headers */
472		msghdr = msqkptr->u.msg_first;
473		while (msghdr != NULL) {
474			struct msg *msghdr_tmp;
475
476			/* Free the segments of each message */
477			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
478			msqkptr->u.msg_qnum--;
479			msghdr_tmp = msghdr;
480			msghdr = msghdr->msg_next;
481			msg_freehdr(msghdr_tmp);
482		}
483
484		if (msqkptr->u.msg_cbytes != 0)
485			panic("msg_cbytes is screwed up");
486		if (msqkptr->u.msg_qnum != 0)
487			panic("msg_qnum is screwed up");
488
489		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
490
491#ifdef MAC
492		mac_sysvmsq_cleanup(msqkptr);
493#endif
494
495		wakeup(msqkptr);
496	}
497
498		break;
499
500	case IPC_SET:
501		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
502			goto done2;
503		if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) {
504			error = priv_check(td, PRIV_IPC_MSGSIZE);
505			if (error)
506				goto done2;
507		}
508		if (msqbuf->msg_qbytes > msginfo.msgmnb) {
509			DPRINTF(("can't increase msg_qbytes beyond %d"
510			    "(truncating)\n", msginfo.msgmnb));
511			msqbuf->msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
512		}
513		if (msqbuf->msg_qbytes == 0) {
514			DPRINTF(("can't reduce msg_qbytes to 0\n"));
515			error = EINVAL;		/* non-standard errno! */
516			goto done2;
517		}
518		msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid;	/* change the owner */
519		msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid;	/* change the owner */
520		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
521		    (msqbuf->msg_perm.mode & 0777);
522		msqkptr->u.msg_qbytes = msqbuf->msg_qbytes;
523		msqkptr->u.msg_ctime = time_second;
524		break;
525
526	case IPC_STAT:
527		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
528			DPRINTF(("requester doesn't have read access\n"));
529			goto done2;
530		}
531		*msqbuf = msqkptr->u;
532		break;
533
534	default:
535		DPRINTF(("invalid command %d\n", cmd));
536		error = EINVAL;
537		goto done2;
538	}
539
540	if (error == 0)
541		td->td_retval[0] = rval;
542done2:
543	mtx_unlock(&msq_mtx);
544	return (error);
545}
546
547#ifndef _SYS_SYSPROTO_H_
548struct msgget_args {
549	key_t	key;
550	int	msgflg;
551};
552#endif
553int
554msgget(td, uap)
555	struct thread *td;
556	register struct msgget_args *uap;
557{
558	int msqid, error = 0;
559	int key = uap->key;
560	int msgflg = uap->msgflg;
561	struct ucred *cred = td->td_ucred;
562	register struct msqid_kernel *msqkptr = NULL;
563
564	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
565
566	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
567		return (ENOSYS);
568
569	mtx_lock(&msq_mtx);
570	if (key != IPC_PRIVATE) {
571		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
572			msqkptr = &msqids[msqid];
573			if (msqkptr->u.msg_qbytes != 0 &&
574			    msqkptr->u.msg_perm.key == key)
575				break;
576		}
577		if (msqid < msginfo.msgmni) {
578			DPRINTF(("found public key\n"));
579			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
580				DPRINTF(("not exclusive\n"));
581				error = EEXIST;
582				goto done2;
583			}
584			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
585			    msgflg & 0700))) {
586				DPRINTF(("requester doesn't have 0%o access\n",
587				    msgflg & 0700));
588				goto done2;
589			}
590#ifdef MAC
591			error = mac_sysvmsq_check_msqget(cred, msqkptr);
592			if (error != 0)
593				goto done2;
594#endif
595			goto found;
596		}
597	}
598
599	DPRINTF(("need to allocate the msqid_ds\n"));
600	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
601		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
602			/*
603			 * Look for an unallocated and unlocked msqid_ds.
604			 * msqid_ds's can be locked by msgsnd or msgrcv while
605			 * they are copying the message in/out.  We can't
606			 * re-use the entry until they release it.
607			 */
608			msqkptr = &msqids[msqid];
609			if (msqkptr->u.msg_qbytes == 0 &&
610			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
611				break;
612		}
613		if (msqid == msginfo.msgmni) {
614			DPRINTF(("no more msqid_ds's available\n"));
615			error = ENOSPC;
616			goto done2;
617		}
618		DPRINTF(("msqid %d is available\n", msqid));
619		msqkptr->u.msg_perm.key = key;
620		msqkptr->u.msg_perm.cuid = cred->cr_uid;
621		msqkptr->u.msg_perm.uid = cred->cr_uid;
622		msqkptr->u.msg_perm.cgid = cred->cr_gid;
623		msqkptr->u.msg_perm.gid = cred->cr_gid;
624		msqkptr->u.msg_perm.mode = (msgflg & 0777);
625		/* Make sure that the returned msqid is unique */
626		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
627		msqkptr->u.msg_first = NULL;
628		msqkptr->u.msg_last = NULL;
629		msqkptr->u.msg_cbytes = 0;
630		msqkptr->u.msg_qnum = 0;
631		msqkptr->u.msg_qbytes = msginfo.msgmnb;
632		msqkptr->u.msg_lspid = 0;
633		msqkptr->u.msg_lrpid = 0;
634		msqkptr->u.msg_stime = 0;
635		msqkptr->u.msg_rtime = 0;
636		msqkptr->u.msg_ctime = time_second;
637#ifdef MAC
638		mac_sysvmsq_create(cred, msqkptr);
639#endif
640	} else {
641		DPRINTF(("didn't find it and wasn't asked to create it\n"));
642		error = ENOENT;
643		goto done2;
644	}
645
646found:
647	/* Construct the unique msqid */
648	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
649done2:
650	mtx_unlock(&msq_mtx);
651	return (error);
652}
653
654#ifndef _SYS_SYSPROTO_H_
655struct msgsnd_args {
656	int	msqid;
657	const void	*msgp;
658	size_t	msgsz;
659	int	msgflg;
660};
661#endif
662int
663kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
664	struct thread *td;
665	int msqid;
666	const void *msgp;	/* XXX msgp is actually mtext. */
667	size_t msgsz;
668	int msgflg;
669	long mtype;
670{
671	int msqix, segs_needed, error = 0;
672	register struct msqid_kernel *msqkptr;
673	register struct msg *msghdr;
674	short next;
675
676	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
677		return (ENOSYS);
678
679	mtx_lock(&msq_mtx);
680	msqix = IPCID_TO_IX(msqid);
681
682	if (msqix < 0 || msqix >= msginfo.msgmni) {
683		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
684		    msginfo.msgmni));
685		error = EINVAL;
686		goto done2;
687	}
688
689	msqkptr = &msqids[msqix];
690	if (msqkptr->u.msg_qbytes == 0) {
691		DPRINTF(("no such message queue id\n"));
692		error = EINVAL;
693		goto done2;
694	}
695	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
696		DPRINTF(("wrong sequence number\n"));
697		error = EINVAL;
698		goto done2;
699	}
700
701	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
702		DPRINTF(("requester doesn't have write access\n"));
703		goto done2;
704	}
705
706#ifdef MAC
707	error = mac_sysvmsq_check_msqsnd(td->td_ucred, msqkptr);
708	if (error != 0)
709		goto done2;
710#endif
711
712	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
713	DPRINTF(("msgsz=%zu, msgssz=%d, segs_needed=%d\n", msgsz,
714	    msginfo.msgssz, segs_needed));
715	for (;;) {
716		int need_more_resources = 0;
717
718		/*
719		 * check msgsz
720		 * (inside this loop in case msg_qbytes changes while we sleep)
721		 */
722
723		if (msgsz > msqkptr->u.msg_qbytes) {
724			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
725			error = EINVAL;
726			goto done2;
727		}
728
729		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
730			DPRINTF(("msqid is locked\n"));
731			need_more_resources = 1;
732		}
733		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
734			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
735			need_more_resources = 1;
736		}
737		if (segs_needed > nfree_msgmaps) {
738			DPRINTF(("segs_needed > nfree_msgmaps\n"));
739			need_more_resources = 1;
740		}
741		if (free_msghdrs == NULL) {
742			DPRINTF(("no more msghdrs\n"));
743			need_more_resources = 1;
744		}
745
746		if (need_more_resources) {
747			int we_own_it;
748
749			if ((msgflg & IPC_NOWAIT) != 0) {
750				DPRINTF(("need more resources but caller "
751				    "doesn't want to wait\n"));
752				error = EAGAIN;
753				goto done2;
754			}
755
756			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
757				DPRINTF(("we don't own the msqid_ds\n"));
758				we_own_it = 0;
759			} else {
760				/* Force later arrivals to wait for our
761				   request */
762				DPRINTF(("we own the msqid_ds\n"));
763				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
764				we_own_it = 1;
765			}
766			DPRINTF(("msgsnd:  goodnight\n"));
767			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
768			    "msgsnd", hz);
769			DPRINTF(("msgsnd:  good morning, error=%d\n", error));
770			if (we_own_it)
771				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
772			if (error == EWOULDBLOCK) {
773				DPRINTF(("msgsnd:  timed out\n"));
774				continue;
775			}
776			if (error != 0) {
777				DPRINTF(("msgsnd:  interrupted system call\n"));
778				error = EINTR;
779				goto done2;
780			}
781
782			/*
783			 * Make sure that the msq queue still exists
784			 */
785
786			if (msqkptr->u.msg_qbytes == 0) {
787				DPRINTF(("msqid deleted\n"));
788				error = EIDRM;
789				goto done2;
790			}
791
792		} else {
793			DPRINTF(("got all the resources that we need\n"));
794			break;
795		}
796	}
797
798	/*
799	 * We have the resources that we need.
800	 * Make sure!
801	 */
802
803	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
804		panic("msg_perm.mode & MSG_LOCKED");
805	if (segs_needed > nfree_msgmaps)
806		panic("segs_needed > nfree_msgmaps");
807	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
808		panic("msgsz + msg_cbytes > msg_qbytes");
809	if (free_msghdrs == NULL)
810		panic("no more msghdrs");
811
812	/*
813	 * Re-lock the msqid_ds in case we page-fault when copying in the
814	 * message
815	 */
816
817	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
818		panic("msqid_ds is already locked");
819	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
820
821	/*
822	 * Allocate a message header
823	 */
824
825	msghdr = free_msghdrs;
826	free_msghdrs = msghdr->msg_next;
827	msghdr->msg_spot = -1;
828	msghdr->msg_ts = msgsz;
829	msghdr->msg_type = mtype;
830#ifdef MAC
831	/*
832	 * XXXMAC: Should the mac_sysvmsq_check_msgmsq check follow here
833	 * immediately?  Or, should it be checked just before the msg is
834	 * enqueued in the msgq (as it is done now)?
835	 */
836	mac_sysvmsg_create(td->td_ucred, msqkptr, msghdr);
837#endif
838
839	/*
840	 * Allocate space for the message
841	 */
842
843	while (segs_needed > 0) {
844		if (nfree_msgmaps <= 0)
845			panic("not enough msgmaps");
846		if (free_msgmaps == -1)
847			panic("nil free_msgmaps");
848		next = free_msgmaps;
849		if (next <= -1)
850			panic("next too low #1");
851		if (next >= msginfo.msgseg)
852			panic("next out of range #1");
853		DPRINTF(("allocating segment %d to message\n", next));
854		free_msgmaps = msgmaps[next].next;
855		nfree_msgmaps--;
856		msgmaps[next].next = msghdr->msg_spot;
857		msghdr->msg_spot = next;
858		segs_needed--;
859	}
860
861	/*
862	 * Validate the message type
863	 */
864
865	if (msghdr->msg_type < 1) {
866		msg_freehdr(msghdr);
867		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
868		wakeup(msqkptr);
869		DPRINTF(("mtype (%ld) < 1\n", msghdr->msg_type));
870		error = EINVAL;
871		goto done2;
872	}
873
874	/*
875	 * Copy in the message body
876	 */
877
878	next = msghdr->msg_spot;
879	while (msgsz > 0) {
880		size_t tlen;
881		if (msgsz > msginfo.msgssz)
882			tlen = msginfo.msgssz;
883		else
884			tlen = msgsz;
885		if (next <= -1)
886			panic("next too low #2");
887		if (next >= msginfo.msgseg)
888			panic("next out of range #2");
889		mtx_unlock(&msq_mtx);
890		if ((error = copyin(msgp, &msgpool[next * msginfo.msgssz],
891		    tlen)) != 0) {
892			mtx_lock(&msq_mtx);
893			DPRINTF(("error %d copying in message segment\n",
894			    error));
895			msg_freehdr(msghdr);
896			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
897			wakeup(msqkptr);
898			goto done2;
899		}
900		mtx_lock(&msq_mtx);
901		msgsz -= tlen;
902		msgp = (const char *)msgp + tlen;
903		next = msgmaps[next].next;
904	}
905	if (next != -1)
906		panic("didn't use all the msg segments");
907
908	/*
909	 * We've got the message.  Unlock the msqid_ds.
910	 */
911
912	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
913
914	/*
915	 * Make sure that the msqid_ds is still allocated.
916	 */
917
918	if (msqkptr->u.msg_qbytes == 0) {
919		msg_freehdr(msghdr);
920		wakeup(msqkptr);
921		error = EIDRM;
922		goto done2;
923	}
924
925#ifdef MAC
926	/*
927	 * Note: Since the task/thread allocates the msghdr and usually
928	 * primes it with its own MAC label, for a majority of policies, it
929	 * won't be necessary to check whether the msghdr has access
930	 * permissions to the msgq.  The mac_sysvmsq_check_msqsnd check would
931	 * suffice in that case.  However, this hook may be required where
932	 * individual policies derive a non-identical label for the msghdr
933	 * from the current thread label and may want to check the msghdr
934	 * enqueue permissions, along with read/write permissions to the
935	 * msgq.
936	 */
937	error = mac_sysvmsq_check_msgmsq(td->td_ucred, msghdr, msqkptr);
938	if (error != 0) {
939		msg_freehdr(msghdr);
940		wakeup(msqkptr);
941		goto done2;
942	}
943#endif
944
945	/*
946	 * Put the message into the queue
947	 */
948	if (msqkptr->u.msg_first == NULL) {
949		msqkptr->u.msg_first = msghdr;
950		msqkptr->u.msg_last = msghdr;
951	} else {
952		msqkptr->u.msg_last->msg_next = msghdr;
953		msqkptr->u.msg_last = msghdr;
954	}
955	msqkptr->u.msg_last->msg_next = NULL;
956
957	msqkptr->u.msg_cbytes += msghdr->msg_ts;
958	msqkptr->u.msg_qnum++;
959	msqkptr->u.msg_lspid = td->td_proc->p_pid;
960	msqkptr->u.msg_stime = time_second;
961
962	wakeup(msqkptr);
963	td->td_retval[0] = 0;
964done2:
965	mtx_unlock(&msq_mtx);
966	return (error);
967}
968
969int
970msgsnd(td, uap)
971	struct thread *td;
972	register struct msgsnd_args *uap;
973{
974	int error;
975	long mtype;
976
977	DPRINTF(("call to msgsnd(%d, %p, %zu, %d)\n", uap->msqid, uap->msgp,
978	    uap->msgsz, uap->msgflg));
979
980	if ((error = copyin(uap->msgp, &mtype, sizeof(mtype))) != 0) {
981		DPRINTF(("error %d copying the message type\n", error));
982		return (error);
983	}
984	return (kern_msgsnd(td, uap->msqid,
985	    (const char *)uap->msgp + sizeof(mtype),
986	    uap->msgsz, uap->msgflg, mtype));
987}
988
989#ifndef _SYS_SYSPROTO_H_
990struct msgrcv_args {
991	int	msqid;
992	void	*msgp;
993	size_t	msgsz;
994	long	msgtyp;
995	int	msgflg;
996};
997#endif
998int
999kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
1000	struct thread *td;
1001	int msqid;
1002	void *msgp;	/* XXX msgp is actually mtext. */
1003	size_t msgsz;
1004	long msgtyp;
1005	int msgflg;
1006	long *mtype;
1007{
1008	size_t len;
1009	register struct msqid_kernel *msqkptr;
1010	register struct msg *msghdr;
1011	int msqix, error = 0;
1012	short next;
1013
1014	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
1015		return (ENOSYS);
1016
1017	msqix = IPCID_TO_IX(msqid);
1018
1019	if (msqix < 0 || msqix >= msginfo.msgmni) {
1020		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
1021		    msginfo.msgmni));
1022		return (EINVAL);
1023	}
1024
1025	msqkptr = &msqids[msqix];
1026	mtx_lock(&msq_mtx);
1027	if (msqkptr->u.msg_qbytes == 0) {
1028		DPRINTF(("no such message queue id\n"));
1029		error = EINVAL;
1030		goto done2;
1031	}
1032	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1033		DPRINTF(("wrong sequence number\n"));
1034		error = EINVAL;
1035		goto done2;
1036	}
1037
1038	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
1039		DPRINTF(("requester doesn't have read access\n"));
1040		goto done2;
1041	}
1042
1043#ifdef MAC
1044	error = mac_sysvmsq_check_msqrcv(td->td_ucred, msqkptr);
1045	if (error != 0)
1046		goto done2;
1047#endif
1048
1049	msghdr = NULL;
1050	while (msghdr == NULL) {
1051		if (msgtyp == 0) {
1052			msghdr = msqkptr->u.msg_first;
1053			if (msghdr != NULL) {
1054				if (msgsz < msghdr->msg_ts &&
1055				    (msgflg & MSG_NOERROR) == 0) {
1056					DPRINTF(("first message on the queue "
1057					    "is too big (want %zu, got %d)\n",
1058					    msgsz, msghdr->msg_ts));
1059					error = E2BIG;
1060					goto done2;
1061				}
1062#ifdef MAC
1063				error = mac_sysvmsq_check_msgrcv(td->td_ucred,
1064				    msghdr);
1065				if (error != 0)
1066					goto done2;
1067#endif
1068				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
1069					msqkptr->u.msg_first = NULL;
1070					msqkptr->u.msg_last = NULL;
1071				} else {
1072					msqkptr->u.msg_first = msghdr->msg_next;
1073					if (msqkptr->u.msg_first == NULL)
1074						panic("msg_first/last screwed up #1");
1075				}
1076			}
1077		} else {
1078			struct msg *previous;
1079			struct msg **prev;
1080
1081			previous = NULL;
1082			prev = &(msqkptr->u.msg_first);
1083			while ((msghdr = *prev) != NULL) {
1084				/*
1085				 * Is this message's type an exact match or is
1086				 * this message's type less than or equal to
1087				 * the absolute value of a negative msgtyp?
1088				 * Note that the second half of this test can
1089				 * NEVER be true if msgtyp is positive since
1090				 * msg_type is always positive!
1091				 */
1092
1093				if (msgtyp == msghdr->msg_type ||
1094				    msghdr->msg_type <= -msgtyp) {
1095					DPRINTF(("found message type %ld, "
1096					    "requested %ld\n",
1097					    msghdr->msg_type, msgtyp));
1098					if (msgsz < msghdr->msg_ts &&
1099					    (msgflg & MSG_NOERROR) == 0) {
1100						DPRINTF(("requested message "
1101						    "on the queue is too big "
1102						    "(want %zu, got %hu)\n",
1103						    msgsz, msghdr->msg_ts));
1104						error = E2BIG;
1105						goto done2;
1106					}
1107#ifdef MAC
1108					error = mac_sysvmsq_check_msgrcv(
1109					    td->td_ucred, msghdr);
1110					if (error != 0)
1111						goto done2;
1112#endif
1113					*prev = msghdr->msg_next;
1114					if (msghdr == msqkptr->u.msg_last) {
1115						if (previous == NULL) {
1116							if (prev !=
1117							    &msqkptr->u.msg_first)
1118								panic("msg_first/last screwed up #2");
1119							msqkptr->u.msg_first =
1120							    NULL;
1121							msqkptr->u.msg_last =
1122							    NULL;
1123						} else {
1124							if (prev ==
1125							    &msqkptr->u.msg_first)
1126								panic("msg_first/last screwed up #3");
1127							msqkptr->u.msg_last =
1128							    previous;
1129						}
1130					}
1131					break;
1132				}
1133				previous = msghdr;
1134				prev = &(msghdr->msg_next);
1135			}
1136		}
1137
1138		/*
1139		 * We've either extracted the msghdr for the appropriate
1140		 * message or there isn't one.
1141		 * If there is one then bail out of this loop.
1142		 */
1143
1144		if (msghdr != NULL)
1145			break;
1146
1147		/*
1148		 * Hmph!  No message found.  Does the user want to wait?
1149		 */
1150
1151		if ((msgflg & IPC_NOWAIT) != 0) {
1152			DPRINTF(("no appropriate message found (msgtyp=%ld)\n",
1153			    msgtyp));
1154			/* The SVID says to return ENOMSG. */
1155			error = ENOMSG;
1156			goto done2;
1157		}
1158
1159		/*
1160		 * Wait for something to happen
1161		 */
1162
1163		DPRINTF(("msgrcv:  goodnight\n"));
1164		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1165		    "msgrcv", 0);
1166		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1167
1168		if (error != 0) {
1169			DPRINTF(("msgrcv:  interrupted system call\n"));
1170			error = EINTR;
1171			goto done2;
1172		}
1173
1174		/*
1175		 * Make sure that the msq queue still exists
1176		 */
1177
1178		if (msqkptr->u.msg_qbytes == 0 ||
1179		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
1180			DPRINTF(("msqid deleted\n"));
1181			error = EIDRM;
1182			goto done2;
1183		}
1184	}
1185
1186	/*
1187	 * Return the message to the user.
1188	 *
1189	 * First, do the bookkeeping (before we risk being interrupted).
1190	 */
1191
1192	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1193	msqkptr->u.msg_qnum--;
1194	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1195	msqkptr->u.msg_rtime = time_second;
1196
1197	/*
1198	 * Make msgsz the actual amount that we'll be returning.
1199	 * Note that this effectively truncates the message if it is too long
1200	 * (since msgsz is never increased).
1201	 */
1202
1203	DPRINTF(("found a message, msgsz=%zu, msg_ts=%hu\n", msgsz,
1204	    msghdr->msg_ts));
1205	if (msgsz > msghdr->msg_ts)
1206		msgsz = msghdr->msg_ts;
1207	*mtype = msghdr->msg_type;
1208
1209	/*
1210	 * Return the segments to the user
1211	 */
1212
1213	next = msghdr->msg_spot;
1214	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1215		size_t tlen;
1216
1217		if (msgsz - len > msginfo.msgssz)
1218			tlen = msginfo.msgssz;
1219		else
1220			tlen = msgsz - len;
1221		if (next <= -1)
1222			panic("next too low #3");
1223		if (next >= msginfo.msgseg)
1224			panic("next out of range #3");
1225		mtx_unlock(&msq_mtx);
1226		error = copyout(&msgpool[next * msginfo.msgssz], msgp, tlen);
1227		mtx_lock(&msq_mtx);
1228		if (error != 0) {
1229			DPRINTF(("error (%d) copying out message segment\n",
1230			    error));
1231			msg_freehdr(msghdr);
1232			wakeup(msqkptr);
1233			goto done2;
1234		}
1235		msgp = (char *)msgp + tlen;
1236		next = msgmaps[next].next;
1237	}
1238
1239	/*
1240	 * Done, return the actual number of bytes copied out.
1241	 */
1242
1243	msg_freehdr(msghdr);
1244	wakeup(msqkptr);
1245	td->td_retval[0] = msgsz;
1246done2:
1247	mtx_unlock(&msq_mtx);
1248	return (error);
1249}
1250
1251int
1252msgrcv(td, uap)
1253	struct thread *td;
1254	register struct msgrcv_args *uap;
1255{
1256	int error;
1257	long mtype;
1258
1259	DPRINTF(("call to msgrcv(%d, %p, %zu, %ld, %d)\n", uap->msqid,
1260	    uap->msgp, uap->msgsz, uap->msgtyp, uap->msgflg));
1261
1262	if ((error = kern_msgrcv(td, uap->msqid,
1263	    (char *)uap->msgp + sizeof(mtype), uap->msgsz,
1264	    uap->msgtyp, uap->msgflg, &mtype)) != 0)
1265		return (error);
1266	if ((error = copyout(&mtype, uap->msgp, sizeof(mtype))) != 0)
1267		DPRINTF(("error %d copying the message type\n", error));
1268	return (error);
1269}
1270
1271static int
1272sysctl_msqids(SYSCTL_HANDLER_ARGS)
1273{
1274
1275	return (SYSCTL_OUT(req, msqids,
1276	    sizeof(struct msqid_kernel) * msginfo.msgmni));
1277}
1278
1279SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1280    "Maximum message size");
1281SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0,
1282    "Number of message queue identifiers");
1283SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0,
1284    "Maximum number of bytes in a queue");
1285SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0,
1286    "Maximum number of messages in the system");
1287SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0,
1288    "Size of a message segment");
1289SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
1290    "Number of message segments");
1291SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1292    NULL, 0, sysctl_msqids, "", "Message queue IDs");
1293