sysv_msg.c revision 162468
1/*-
2 * Implementation of SVID messages
3 *
4 * Author:  Daniel Boulet
5 *
6 * Copyright 1993 Daniel Boulet and RTMX Inc.
7 *
8 * This system call was implemented by Daniel Boulet under contract from RTMX.
9 *
10 * Redistribution and use in source forms, with and without modification,
11 * are permitted provided that this entire comment appears intact.
12 *
13 * Redistribution in binary form may occur without any restrictions.
14 * Obviously, it would be nice if you gave credit where credit is due
15 * but requiring it would be too onerous.
16 *
17 * This software is provided ``AS IS'' without any warranties of any kind.
18 */
19/*-
20 * Copyright (c) 2003-2005 McAfee, Inc.
21 * All rights reserved.
22 *
23 * This software was developed for the FreeBSD Project in part by McAfee
24 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
25 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
26 * program.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 *    notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 *    notice, this list of conditions and the following disclaimer in the
35 *    documentation and/or other materials provided with the distribution.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 */
49
50#include <sys/cdefs.h>
51__FBSDID("$FreeBSD: head/sys/kern/sysv_msg.c 162468 2006-09-20 13:40:00Z rwatson $");
52
53#include "opt_sysvipc.h"
54#include "opt_mac.h"
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/sysproto.h>
59#include <sys/kernel.h>
60#include <sys/proc.h>
61#include <sys/lock.h>
62#include <sys/mac.h>
63#include <sys/mutex.h>
64#include <sys/module.h>
65#include <sys/msg.h>
66#include <sys/syscall.h>
67#include <sys/syscallsubr.h>
68#include <sys/sysent.h>
69#include <sys/sysctl.h>
70#include <sys/malloc.h>
71#include <sys/jail.h>
72
73static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
74
75static void msginit(void);
76static int msgunload(void);
77static int sysvmsg_modload(struct module *, int, void *);
78
79#ifdef MSG_DEBUG
80#define DPRINTF(a)	printf a
81#else
82#define DPRINTF(a)
83#endif
84
85static void msg_freehdr(struct msg *msghdr);
86
87/* XXX casting to (sy_call_t *) is bogus, as usual. */
88static sy_call_t *msgcalls[] = {
89	(sy_call_t *)msgctl, (sy_call_t *)msgget,
90	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
91};
92
93#ifndef MSGSSZ
94#define MSGSSZ	8		/* Each segment must be 2^N long */
95#endif
96#ifndef MSGSEG
97#define MSGSEG	2048		/* must be less than 32767 */
98#endif
99#define MSGMAX	(MSGSSZ*MSGSEG)
100#ifndef MSGMNB
101#define MSGMNB	2048		/* max # of bytes in a queue */
102#endif
103#ifndef MSGMNI
104#define MSGMNI	40
105#endif
106#ifndef MSGTQL
107#define MSGTQL	40
108#endif
109
110/*
111 * Based on the configuration parameters described in an SVR2 (yes, two)
112 * config(1m) man page.
113 *
114 * Each message is broken up and stored in segments that are msgssz bytes
115 * long.  For efficiency reasons, this should be a power of two.  Also,
116 * it doesn't make sense if it is less than 8 or greater than about 256.
117 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
118 * two between 8 and 1024 inclusive (and panic's if it isn't).
119 */
120struct msginfo msginfo = {
121                MSGMAX,         /* max chars in a message */
122                MSGMNI,         /* # of message queue identifiers */
123                MSGMNB,         /* max chars in a queue */
124                MSGTQL,         /* max messages in system */
125                MSGSSZ,         /* size of a message segment */
126                		/* (must be small power of 2 greater than 4) */
127                MSGSEG          /* number of message segments */
128};
129
130/*
131 * macros to convert between msqid_ds's and msqid's.
132 * (specific to this implementation)
133 */
134#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
135#define MSQID_IX(id)	((id) & 0xffff)
136#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
137
138/*
139 * The rest of this file is specific to this particular implementation.
140 */
141
142struct msgmap {
143	short	next;		/* next segment in buffer */
144    				/* -1 -> available */
145    				/* 0..(MSGSEG-1) -> index of next segment */
146};
147
148#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
149
150static int nfree_msgmaps;	/* # of free map entries */
151static short free_msgmaps;	/* head of linked list of free map entries */
152static struct msg *free_msghdrs;/* list of free msg headers */
153static char *msgpool;		/* MSGMAX byte long msg buffer pool */
154static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
155static struct msg *msghdrs;	/* MSGTQL msg headers */
156static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
157static struct mtx msq_mtx;	/* global mutex for message queues. */
158
159static void
160msginit()
161{
162	register int i;
163
164	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
165	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
166	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
167	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
168	TUNABLE_INT_FETCH("kern.ipc.msgmnb", &msginfo.msgmnb);
169	TUNABLE_INT_FETCH("kern.ipc.msgtql", &msginfo.msgtql);
170
171	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
172	if (msgpool == NULL)
173		panic("msgpool is NULL");
174	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
175	if (msgmaps == NULL)
176		panic("msgmaps is NULL");
177	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
178	if (msghdrs == NULL)
179		panic("msghdrs is NULL");
180	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
181	    M_WAITOK);
182	if (msqids == NULL)
183		panic("msqids is NULL");
184
185	/*
186	 * msginfo.msgssz should be a power of two for efficiency reasons.
187	 * It is also pretty silly if msginfo.msgssz is less than 8
188	 * or greater than about 256 so ...
189	 */
190
191	i = 8;
192	while (i < 1024 && i != msginfo.msgssz)
193		i <<= 1;
194    	if (i != msginfo.msgssz) {
195		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
196		    msginfo.msgssz));
197		panic("msginfo.msgssz not a small power of 2");
198	}
199
200	if (msginfo.msgseg > 32767) {
201		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
202		panic("msginfo.msgseg > 32767");
203	}
204
205	if (msgmaps == NULL)
206		panic("msgmaps is NULL");
207
208	for (i = 0; i < msginfo.msgseg; i++) {
209		if (i > 0)
210			msgmaps[i-1].next = i;
211		msgmaps[i].next = -1;	/* implies entry is available */
212	}
213	free_msgmaps = 0;
214	nfree_msgmaps = msginfo.msgseg;
215
216	if (msghdrs == NULL)
217		panic("msghdrs is NULL");
218
219	for (i = 0; i < msginfo.msgtql; i++) {
220		msghdrs[i].msg_type = 0;
221		if (i > 0)
222			msghdrs[i-1].msg_next = &msghdrs[i];
223		msghdrs[i].msg_next = NULL;
224#ifdef MAC
225		mac_init_sysv_msgmsg(&msghdrs[i]);
226#endif
227    	}
228	free_msghdrs = &msghdrs[0];
229
230	if (msqids == NULL)
231		panic("msqids is NULL");
232
233	for (i = 0; i < msginfo.msgmni; i++) {
234		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
235		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
236		msqids[i].u.msg_perm.mode = 0;
237#ifdef MAC
238		mac_init_sysv_msgqueue(&msqids[i]);
239#endif
240	}
241	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
242}
243
244static int
245msgunload()
246{
247	struct msqid_kernel *msqkptr;
248	int msqid;
249#ifdef MAC
250	int i;
251#endif
252
253	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
254		/*
255		 * Look for an unallocated and unlocked msqid_ds.
256		 * msqid_ds's can be locked by msgsnd or msgrcv while
257		 * they are copying the message in/out.  We can't
258		 * re-use the entry until they release it.
259		 */
260		msqkptr = &msqids[msqid];
261		if (msqkptr->u.msg_qbytes != 0 ||
262		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
263			break;
264	}
265	if (msqid != msginfo.msgmni)
266		return (EBUSY);
267
268#ifdef MAC
269	for (i = 0; i < msginfo.msgtql; i++)
270		mac_destroy_sysv_msgmsg(&msghdrs[i]);
271	for (msqid = 0; msqid < msginfo.msgmni; msqid++)
272		mac_destroy_sysv_msgqueue(&msqids[msqid]);
273#endif
274	free(msgpool, M_MSG);
275	free(msgmaps, M_MSG);
276	free(msghdrs, M_MSG);
277	free(msqids, M_MSG);
278	mtx_destroy(&msq_mtx);
279	return (0);
280}
281
282
283static int
284sysvmsg_modload(struct module *module, int cmd, void *arg)
285{
286	int error = 0;
287
288	switch (cmd) {
289	case MOD_LOAD:
290		msginit();
291		break;
292	case MOD_UNLOAD:
293		error = msgunload();
294		break;
295	case MOD_SHUTDOWN:
296		break;
297	default:
298		error = EINVAL;
299		break;
300	}
301	return (error);
302}
303
304static moduledata_t sysvmsg_mod = {
305	"sysvmsg",
306	&sysvmsg_modload,
307	NULL
308};
309
310SYSCALL_MODULE_HELPER(msgsys);
311SYSCALL_MODULE_HELPER(msgctl);
312SYSCALL_MODULE_HELPER(msgget);
313SYSCALL_MODULE_HELPER(msgsnd);
314SYSCALL_MODULE_HELPER(msgrcv);
315
316DECLARE_MODULE(sysvmsg, sysvmsg_mod,
317	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
318MODULE_VERSION(sysvmsg, 1);
319
320/*
321 * Entry point for all MSG calls
322 *
323 * MPSAFE
324 */
325int
326msgsys(td, uap)
327	struct thread *td;
328	/* XXX actually varargs. */
329	struct msgsys_args /* {
330		int	which;
331		int	a2;
332		int	a3;
333		int	a4;
334		int	a5;
335		int	a6;
336	} */ *uap;
337{
338	int error;
339
340	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
341		return (ENOSYS);
342	if (uap->which < 0 ||
343	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
344		return (EINVAL);
345	error = (*msgcalls[uap->which])(td, &uap->a2);
346	return (error);
347}
348
349static void
350msg_freehdr(msghdr)
351	struct msg *msghdr;
352{
353	while (msghdr->msg_ts > 0) {
354		short next;
355		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
356			panic("msghdr->msg_spot out of range");
357		next = msgmaps[msghdr->msg_spot].next;
358		msgmaps[msghdr->msg_spot].next = free_msgmaps;
359		free_msgmaps = msghdr->msg_spot;
360		nfree_msgmaps++;
361		msghdr->msg_spot = next;
362		if (msghdr->msg_ts >= msginfo.msgssz)
363			msghdr->msg_ts -= msginfo.msgssz;
364		else
365			msghdr->msg_ts = 0;
366	}
367	if (msghdr->msg_spot != -1)
368		panic("msghdr->msg_spot != -1");
369	msghdr->msg_next = free_msghdrs;
370	free_msghdrs = msghdr;
371#ifdef MAC
372	mac_cleanup_sysv_msgmsg(msghdr);
373#endif
374}
375
376#ifndef _SYS_SYSPROTO_H_
377struct msgctl_args {
378	int	msqid;
379	int	cmd;
380	struct	msqid_ds *buf;
381};
382#endif
383
384/*
385 * MPSAFE
386 */
387int
388msgctl(td, uap)
389	struct thread *td;
390	register struct msgctl_args *uap;
391{
392	int msqid = uap->msqid;
393	int cmd = uap->cmd;
394	struct msqid_ds msqbuf;
395	int error;
396
397	DPRINTF(("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, uap->buf));
398	if (cmd == IPC_SET &&
399	    (error = copyin(uap->buf, &msqbuf, sizeof(msqbuf))) != 0)
400		return (error);
401	error = kern_msgctl(td, msqid, cmd, &msqbuf);
402	if (cmd == IPC_STAT && error == 0)
403		error = copyout(&msqbuf, uap->buf, sizeof(struct msqid_ds));
404	return (error);
405}
406
407int
408kern_msgctl(td, msqid, cmd, msqbuf)
409	struct thread *td;
410	int msqid;
411	int cmd;
412	struct msqid_ds *msqbuf;
413{
414	int rval, error, msqix;
415	register struct msqid_kernel *msqkptr;
416
417	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
418		return (ENOSYS);
419
420	msqix = IPCID_TO_IX(msqid);
421
422	if (msqix < 0 || msqix >= msginfo.msgmni) {
423		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqix,
424		    msginfo.msgmni));
425		return (EINVAL);
426	}
427
428	msqkptr = &msqids[msqix];
429
430	mtx_lock(&msq_mtx);
431	if (msqkptr->u.msg_qbytes == 0) {
432		DPRINTF(("no such msqid\n"));
433		error = EINVAL;
434		goto done2;
435	}
436	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(msqid)) {
437		DPRINTF(("wrong sequence number\n"));
438		error = EINVAL;
439		goto done2;
440	}
441#ifdef MAC
442	error = mac_check_sysv_msqctl(td->td_ucred, msqkptr, cmd);
443	if (error != 0)
444		goto done2;
445#endif
446
447	error = 0;
448	rval = 0;
449
450	switch (cmd) {
451
452	case IPC_RMID:
453	{
454		struct msg *msghdr;
455		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
456			goto done2;
457
458#ifdef MAC
459		/*
460		 * Check that the thread has MAC access permissions to
461		 * individual msghdrs.  Note: We need to do this in a
462		 * separate loop because the actual loop alters the
463		 * msq/msghdr info as it progresses, and there is no going
464		 * back if half the way through we discover that the
465		 * thread cannot free a certain msghdr.  The msq will get
466		 * into an inconsistent state.
467		 */
468		for (msghdr = msqkptr->u.msg_first; msghdr != NULL;
469		    msghdr = msghdr->msg_next) {
470			error = mac_check_sysv_msgrmid(td->td_ucred, msghdr);
471			if (error != 0)
472				goto done2;
473		}
474#endif
475
476		/* Free the message headers */
477		msghdr = msqkptr->u.msg_first;
478		while (msghdr != NULL) {
479			struct msg *msghdr_tmp;
480
481			/* Free the segments of each message */
482			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
483			msqkptr->u.msg_qnum--;
484			msghdr_tmp = msghdr;
485			msghdr = msghdr->msg_next;
486			msg_freehdr(msghdr_tmp);
487		}
488
489		if (msqkptr->u.msg_cbytes != 0)
490			panic("msg_cbytes is screwed up");
491		if (msqkptr->u.msg_qnum != 0)
492			panic("msg_qnum is screwed up");
493
494		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
495
496#ifdef MAC
497		mac_cleanup_sysv_msgqueue(msqkptr);
498#endif
499
500		wakeup(msqkptr);
501	}
502
503		break;
504
505	case IPC_SET:
506		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
507			goto done2;
508		if (msqbuf->msg_qbytes > msqkptr->u.msg_qbytes) {
509			error = suser(td);
510			if (error)
511				goto done2;
512		}
513		if (msqbuf->msg_qbytes > msginfo.msgmnb) {
514			DPRINTF(("can't increase msg_qbytes beyond %d"
515			    "(truncating)\n", msginfo.msgmnb));
516			msqbuf->msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
517		}
518		if (msqbuf->msg_qbytes == 0) {
519			DPRINTF(("can't reduce msg_qbytes to 0\n"));
520			error = EINVAL;		/* non-standard errno! */
521			goto done2;
522		}
523		msqkptr->u.msg_perm.uid = msqbuf->msg_perm.uid;	/* change the owner */
524		msqkptr->u.msg_perm.gid = msqbuf->msg_perm.gid;	/* change the owner */
525		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
526		    (msqbuf->msg_perm.mode & 0777);
527		msqkptr->u.msg_qbytes = msqbuf->msg_qbytes;
528		msqkptr->u.msg_ctime = time_second;
529		break;
530
531	case IPC_STAT:
532		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
533			DPRINTF(("requester doesn't have read access\n"));
534			goto done2;
535		}
536		*msqbuf = msqkptr->u;
537		break;
538
539	default:
540		DPRINTF(("invalid command %d\n", cmd));
541		error = EINVAL;
542		goto done2;
543	}
544
545	if (error == 0)
546		td->td_retval[0] = rval;
547done2:
548	mtx_unlock(&msq_mtx);
549	return (error);
550}
551
552#ifndef _SYS_SYSPROTO_H_
553struct msgget_args {
554	key_t	key;
555	int	msgflg;
556};
557#endif
558
559/*
560 * MPSAFE
561 */
562int
563msgget(td, uap)
564	struct thread *td;
565	register struct msgget_args *uap;
566{
567	int msqid, error = 0;
568	int key = uap->key;
569	int msgflg = uap->msgflg;
570	struct ucred *cred = td->td_ucred;
571	register struct msqid_kernel *msqkptr = NULL;
572
573	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
574
575	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
576		return (ENOSYS);
577
578	mtx_lock(&msq_mtx);
579	if (key != IPC_PRIVATE) {
580		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
581			msqkptr = &msqids[msqid];
582			if (msqkptr->u.msg_qbytes != 0 &&
583			    msqkptr->u.msg_perm.key == key)
584				break;
585		}
586		if (msqid < msginfo.msgmni) {
587			DPRINTF(("found public key\n"));
588			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
589				DPRINTF(("not exclusive\n"));
590				error = EEXIST;
591				goto done2;
592			}
593			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
594			    msgflg & 0700))) {
595				DPRINTF(("requester doesn't have 0%o access\n",
596				    msgflg & 0700));
597				goto done2;
598			}
599#ifdef MAC
600			error = mac_check_sysv_msqget(cred, msqkptr);
601			if (error != 0)
602				goto done2;
603#endif
604			goto found;
605		}
606	}
607
608	DPRINTF(("need to allocate the msqid_ds\n"));
609	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
610		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
611			/*
612			 * Look for an unallocated and unlocked msqid_ds.
613			 * msqid_ds's can be locked by msgsnd or msgrcv while
614			 * they are copying the message in/out.  We can't
615			 * re-use the entry until they release it.
616			 */
617			msqkptr = &msqids[msqid];
618			if (msqkptr->u.msg_qbytes == 0 &&
619			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
620				break;
621		}
622		if (msqid == msginfo.msgmni) {
623			DPRINTF(("no more msqid_ds's available\n"));
624			error = ENOSPC;
625			goto done2;
626		}
627		DPRINTF(("msqid %d is available\n", msqid));
628		msqkptr->u.msg_perm.key = key;
629		msqkptr->u.msg_perm.cuid = cred->cr_uid;
630		msqkptr->u.msg_perm.uid = cred->cr_uid;
631		msqkptr->u.msg_perm.cgid = cred->cr_gid;
632		msqkptr->u.msg_perm.gid = cred->cr_gid;
633		msqkptr->u.msg_perm.mode = (msgflg & 0777);
634		/* Make sure that the returned msqid is unique */
635		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
636		msqkptr->u.msg_first = NULL;
637		msqkptr->u.msg_last = NULL;
638		msqkptr->u.msg_cbytes = 0;
639		msqkptr->u.msg_qnum = 0;
640		msqkptr->u.msg_qbytes = msginfo.msgmnb;
641		msqkptr->u.msg_lspid = 0;
642		msqkptr->u.msg_lrpid = 0;
643		msqkptr->u.msg_stime = 0;
644		msqkptr->u.msg_rtime = 0;
645		msqkptr->u.msg_ctime = time_second;
646#ifdef MAC
647		mac_create_sysv_msgqueue(cred, msqkptr);
648#endif
649	} else {
650		DPRINTF(("didn't find it and wasn't asked to create it\n"));
651		error = ENOENT;
652		goto done2;
653	}
654
655found:
656	/* Construct the unique msqid */
657	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
658done2:
659	mtx_unlock(&msq_mtx);
660	return (error);
661}
662
663#ifndef _SYS_SYSPROTO_H_
664struct msgsnd_args {
665	int	msqid;
666	const void	*msgp;
667	size_t	msgsz;
668	int	msgflg;
669};
670#endif
671
672/*
673 * MPSAFE
674 */
675int
676msgsnd(td, uap)
677	struct thread *td;
678	register struct msgsnd_args *uap;
679{
680	int msqid = uap->msqid;
681	const void *user_msgp = uap->msgp;
682	size_t msgsz = uap->msgsz;
683	int msgflg = uap->msgflg;
684	int segs_needed, error = 0;
685	register struct msqid_kernel *msqkptr;
686	register struct msg *msghdr;
687	short next;
688
689	DPRINTF(("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
690	    msgflg));
691	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
692		return (ENOSYS);
693
694	mtx_lock(&msq_mtx);
695	msqid = IPCID_TO_IX(msqid);
696
697	if (msqid < 0 || msqid >= msginfo.msgmni) {
698		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
699		    msginfo.msgmni));
700		error = EINVAL;
701		goto done2;
702	}
703
704	msqkptr = &msqids[msqid];
705	if (msqkptr->u.msg_qbytes == 0) {
706		DPRINTF(("no such message queue id\n"));
707		error = EINVAL;
708		goto done2;
709	}
710	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
711		DPRINTF(("wrong sequence number\n"));
712		error = EINVAL;
713		goto done2;
714	}
715
716	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
717		DPRINTF(("requester doesn't have write access\n"));
718		goto done2;
719	}
720
721#ifdef MAC
722	error = mac_check_sysv_msqsnd(td->td_ucred, msqkptr);
723	if (error != 0)
724		goto done2;
725#endif
726
727	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
728	DPRINTF(("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
729	    segs_needed));
730	for (;;) {
731		int need_more_resources = 0;
732
733		/*
734		 * check msgsz
735		 * (inside this loop in case msg_qbytes changes while we sleep)
736		 */
737
738		if (msgsz > msqkptr->u.msg_qbytes) {
739			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
740			error = EINVAL;
741			goto done2;
742		}
743
744		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
745			DPRINTF(("msqid is locked\n"));
746			need_more_resources = 1;
747		}
748		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
749			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
750			need_more_resources = 1;
751		}
752		if (segs_needed > nfree_msgmaps) {
753			DPRINTF(("segs_needed > nfree_msgmaps\n"));
754			need_more_resources = 1;
755		}
756		if (free_msghdrs == NULL) {
757			DPRINTF(("no more msghdrs\n"));
758			need_more_resources = 1;
759		}
760
761		if (need_more_resources) {
762			int we_own_it;
763
764			if ((msgflg & IPC_NOWAIT) != 0) {
765				DPRINTF(("need more resources but caller "
766				    "doesn't want to wait\n"));
767				error = EAGAIN;
768				goto done2;
769			}
770
771			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
772				DPRINTF(("we don't own the msqid_ds\n"));
773				we_own_it = 0;
774			} else {
775				/* Force later arrivals to wait for our
776				   request */
777				DPRINTF(("we own the msqid_ds\n"));
778				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
779				we_own_it = 1;
780			}
781			DPRINTF(("goodnight\n"));
782			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
783			    "msgwait", 0);
784			DPRINTF(("good morning, error=%d\n", error));
785			if (we_own_it)
786				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
787			if (error != 0) {
788				DPRINTF(("msgsnd:  interrupted system call\n"));
789				error = EINTR;
790				goto done2;
791			}
792
793			/*
794			 * Make sure that the msq queue still exists
795			 */
796
797			if (msqkptr->u.msg_qbytes == 0) {
798				DPRINTF(("msqid deleted\n"));
799				error = EIDRM;
800				goto done2;
801			}
802
803		} else {
804			DPRINTF(("got all the resources that we need\n"));
805			break;
806		}
807	}
808
809	/*
810	 * We have the resources that we need.
811	 * Make sure!
812	 */
813
814	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
815		panic("msg_perm.mode & MSG_LOCKED");
816	if (segs_needed > nfree_msgmaps)
817		panic("segs_needed > nfree_msgmaps");
818	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
819		panic("msgsz + msg_cbytes > msg_qbytes");
820	if (free_msghdrs == NULL)
821		panic("no more msghdrs");
822
823	/*
824	 * Re-lock the msqid_ds in case we page-fault when copying in the
825	 * message
826	 */
827
828	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
829		panic("msqid_ds is already locked");
830	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
831
832	/*
833	 * Allocate a message header
834	 */
835
836	msghdr = free_msghdrs;
837	free_msghdrs = msghdr->msg_next;
838	msghdr->msg_spot = -1;
839	msghdr->msg_ts = msgsz;
840#ifdef MAC
841	/*
842	 * XXXMAC: Should the mac_check_sysv_msgmsq check follow here
843	 * immediately?  Or, should it be checked just before the msg is
844	 * enqueued in the msgq (as it is done now)?
845	 */
846	mac_create_sysv_msgmsg(td->td_ucred, msqkptr, msghdr);
847#endif
848
849	/*
850	 * Allocate space for the message
851	 */
852
853	while (segs_needed > 0) {
854		if (nfree_msgmaps <= 0)
855			panic("not enough msgmaps");
856		if (free_msgmaps == -1)
857			panic("nil free_msgmaps");
858		next = free_msgmaps;
859		if (next <= -1)
860			panic("next too low #1");
861		if (next >= msginfo.msgseg)
862			panic("next out of range #1");
863		DPRINTF(("allocating segment %d to message\n", next));
864		free_msgmaps = msgmaps[next].next;
865		nfree_msgmaps--;
866		msgmaps[next].next = msghdr->msg_spot;
867		msghdr->msg_spot = next;
868		segs_needed--;
869	}
870
871	/*
872	 * Copy in the message type
873	 */
874
875	mtx_unlock(&msq_mtx);
876	if ((error = copyin(user_msgp, &msghdr->msg_type,
877	    sizeof(msghdr->msg_type))) != 0) {
878		mtx_lock(&msq_mtx);
879		DPRINTF(("error %d copying the message type\n", error));
880		msg_freehdr(msghdr);
881		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
882		wakeup(msqkptr);
883		goto done2;
884	}
885	mtx_lock(&msq_mtx);
886	user_msgp = (const char *)user_msgp + sizeof(msghdr->msg_type);
887
888	/*
889	 * Validate the message type
890	 */
891
892	if (msghdr->msg_type < 1) {
893		msg_freehdr(msghdr);
894		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
895		wakeup(msqkptr);
896		DPRINTF(("mtype (%d) < 1\n", msghdr->msg_type));
897		error = EINVAL;
898		goto done2;
899	}
900
901	/*
902	 * Copy in the message body
903	 */
904
905	next = msghdr->msg_spot;
906	while (msgsz > 0) {
907		size_t tlen;
908		if (msgsz > msginfo.msgssz)
909			tlen = msginfo.msgssz;
910		else
911			tlen = msgsz;
912		if (next <= -1)
913			panic("next too low #2");
914		if (next >= msginfo.msgseg)
915			panic("next out of range #2");
916		mtx_unlock(&msq_mtx);
917		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
918		    tlen)) != 0) {
919			mtx_lock(&msq_mtx);
920			DPRINTF(("error %d copying in message segment\n",
921			    error));
922			msg_freehdr(msghdr);
923			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
924			wakeup(msqkptr);
925			goto done2;
926		}
927		mtx_lock(&msq_mtx);
928		msgsz -= tlen;
929		user_msgp = (const char *)user_msgp + tlen;
930		next = msgmaps[next].next;
931	}
932	if (next != -1)
933		panic("didn't use all the msg segments");
934
935	/*
936	 * We've got the message.  Unlock the msqid_ds.
937	 */
938
939	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
940
941	/*
942	 * Make sure that the msqid_ds is still allocated.
943	 */
944
945	if (msqkptr->u.msg_qbytes == 0) {
946		msg_freehdr(msghdr);
947		wakeup(msqkptr);
948		error = EIDRM;
949		goto done2;
950	}
951
952#ifdef MAC
953	/*
954	 * Note: Since the task/thread allocates the msghdr and usually
955	 * primes it with its own MAC label, for a majority of policies, it
956	 * won't be necessary to check whether the msghdr has access
957	 * permissions to the msgq.  The mac_check_sysv_msqsnd check would
958	 * suffice in that case.  However, this hook may be required where
959	 * individual policies derive a non-identical label for the msghdr
960	 * from the current thread label and may want to check the msghdr
961	 * enqueue permissions, along with read/write permissions to the
962	 * msgq.
963	 */
964	error = mac_check_sysv_msgmsq(td->td_ucred, msghdr, msqkptr);
965	if (error != 0) {
966		msg_freehdr(msghdr);
967		wakeup(msqkptr);
968		goto done2;
969	}
970#endif
971
972	/*
973	 * Put the message into the queue
974	 */
975	if (msqkptr->u.msg_first == NULL) {
976		msqkptr->u.msg_first = msghdr;
977		msqkptr->u.msg_last = msghdr;
978	} else {
979		msqkptr->u.msg_last->msg_next = msghdr;
980		msqkptr->u.msg_last = msghdr;
981	}
982	msqkptr->u.msg_last->msg_next = NULL;
983
984	msqkptr->u.msg_cbytes += msghdr->msg_ts;
985	msqkptr->u.msg_qnum++;
986	msqkptr->u.msg_lspid = td->td_proc->p_pid;
987	msqkptr->u.msg_stime = time_second;
988
989	wakeup(msqkptr);
990	td->td_retval[0] = 0;
991done2:
992	mtx_unlock(&msq_mtx);
993	return (error);
994}
995
996#ifndef _SYS_SYSPROTO_H_
997struct msgrcv_args {
998	int	msqid;
999	void	*msgp;
1000	size_t	msgsz;
1001	long	msgtyp;
1002	int	msgflg;
1003};
1004#endif
1005
1006/*
1007 * MPSAFE
1008 */
1009int
1010msgrcv(td, uap)
1011	struct thread *td;
1012	register struct msgrcv_args *uap;
1013{
1014	int msqid = uap->msqid;
1015	void *user_msgp = uap->msgp;
1016	size_t msgsz = uap->msgsz;
1017	long msgtyp = uap->msgtyp;
1018	int msgflg = uap->msgflg;
1019	size_t len;
1020	register struct msqid_kernel *msqkptr;
1021	register struct msg *msghdr;
1022	int error = 0;
1023	short next;
1024
1025	DPRINTF(("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
1026	    msgsz, msgtyp, msgflg));
1027
1028	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
1029		return (ENOSYS);
1030
1031	msqid = IPCID_TO_IX(msqid);
1032
1033	if (msqid < 0 || msqid >= msginfo.msgmni) {
1034		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
1035		    msginfo.msgmni));
1036		return (EINVAL);
1037	}
1038
1039	msqkptr = &msqids[msqid];
1040	mtx_lock(&msq_mtx);
1041	if (msqkptr->u.msg_qbytes == 0) {
1042		DPRINTF(("no such message queue id\n"));
1043		error = EINVAL;
1044		goto done2;
1045	}
1046	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1047		DPRINTF(("wrong sequence number\n"));
1048		error = EINVAL;
1049		goto done2;
1050	}
1051
1052	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
1053		DPRINTF(("requester doesn't have read access\n"));
1054		goto done2;
1055	}
1056
1057#ifdef MAC
1058	error = mac_check_sysv_msqrcv(td->td_ucred, msqkptr);
1059	if (error != 0)
1060		goto done2;
1061#endif
1062
1063	msghdr = NULL;
1064	while (msghdr == NULL) {
1065		if (msgtyp == 0) {
1066			msghdr = msqkptr->u.msg_first;
1067			if (msghdr != NULL) {
1068				if (msgsz < msghdr->msg_ts &&
1069				    (msgflg & MSG_NOERROR) == 0) {
1070					DPRINTF(("first message on the queue "
1071					    "is too big (want %d, got %d)\n",
1072					    msgsz, msghdr->msg_ts));
1073					error = E2BIG;
1074					goto done2;
1075				}
1076#ifdef MAC
1077				error = mac_check_sysv_msgrcv(td->td_ucred,
1078				    msghdr);
1079				if (error != 0)
1080					goto done2;
1081#endif
1082				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
1083					msqkptr->u.msg_first = NULL;
1084					msqkptr->u.msg_last = NULL;
1085				} else {
1086					msqkptr->u.msg_first = msghdr->msg_next;
1087					if (msqkptr->u.msg_first == NULL)
1088						panic("msg_first/last screwed up #1");
1089				}
1090			}
1091		} else {
1092			struct msg *previous;
1093			struct msg **prev;
1094
1095			previous = NULL;
1096			prev = &(msqkptr->u.msg_first);
1097			while ((msghdr = *prev) != NULL) {
1098				/*
1099				 * Is this message's type an exact match or is
1100				 * this message's type less than or equal to
1101				 * the absolute value of a negative msgtyp?
1102				 * Note that the second half of this test can
1103				 * NEVER be true if msgtyp is positive since
1104				 * msg_type is always positive!
1105				 */
1106
1107				if (msgtyp == msghdr->msg_type ||
1108				    msghdr->msg_type <= -msgtyp) {
1109					DPRINTF(("found message type %d, "
1110					    "requested %d\n",
1111					    msghdr->msg_type, msgtyp));
1112					if (msgsz < msghdr->msg_ts &&
1113					    (msgflg & MSG_NOERROR) == 0) {
1114						DPRINTF(("requested message "
1115						    "on the queue is too big "
1116						    "(want %d, got %d)\n",
1117						    msgsz, msghdr->msg_ts));
1118						error = E2BIG;
1119						goto done2;
1120					}
1121#ifdef MAC
1122					error = mac_check_sysv_msgrcv(
1123					    td->td_ucred, msghdr);
1124					if (error != 0)
1125						goto done2;
1126#endif
1127					*prev = msghdr->msg_next;
1128					if (msghdr == msqkptr->u.msg_last) {
1129						if (previous == NULL) {
1130							if (prev !=
1131							    &msqkptr->u.msg_first)
1132								panic("msg_first/last screwed up #2");
1133							msqkptr->u.msg_first =
1134							    NULL;
1135							msqkptr->u.msg_last =
1136							    NULL;
1137						} else {
1138							if (prev ==
1139							    &msqkptr->u.msg_first)
1140								panic("msg_first/last screwed up #3");
1141							msqkptr->u.msg_last =
1142							    previous;
1143						}
1144					}
1145					break;
1146				}
1147				previous = msghdr;
1148				prev = &(msghdr->msg_next);
1149			}
1150		}
1151
1152		/*
1153		 * We've either extracted the msghdr for the appropriate
1154		 * message or there isn't one.
1155		 * If there is one then bail out of this loop.
1156		 */
1157
1158		if (msghdr != NULL)
1159			break;
1160
1161		/*
1162		 * Hmph!  No message found.  Does the user want to wait?
1163		 */
1164
1165		if ((msgflg & IPC_NOWAIT) != 0) {
1166			DPRINTF(("no appropriate message found (msgtyp=%d)\n",
1167			    msgtyp));
1168			/* The SVID says to return ENOMSG. */
1169			error = ENOMSG;
1170			goto done2;
1171		}
1172
1173		/*
1174		 * Wait for something to happen
1175		 */
1176
1177		DPRINTF(("msgrcv:  goodnight\n"));
1178		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1179		    "msgwait", 0);
1180		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1181
1182		if (error != 0) {
1183			DPRINTF(("msgsnd:  interrupted system call\n"));
1184			error = EINTR;
1185			goto done2;
1186		}
1187
1188		/*
1189		 * Make sure that the msq queue still exists
1190		 */
1191
1192		if (msqkptr->u.msg_qbytes == 0 ||
1193		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1194			DPRINTF(("msqid deleted\n"));
1195			error = EIDRM;
1196			goto done2;
1197		}
1198	}
1199
1200	/*
1201	 * Return the message to the user.
1202	 *
1203	 * First, do the bookkeeping (before we risk being interrupted).
1204	 */
1205
1206	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1207	msqkptr->u.msg_qnum--;
1208	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1209	msqkptr->u.msg_rtime = time_second;
1210
1211	/*
1212	 * Make msgsz the actual amount that we'll be returning.
1213	 * Note that this effectively truncates the message if it is too long
1214	 * (since msgsz is never increased).
1215	 */
1216
1217	DPRINTF(("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1218	    msghdr->msg_ts));
1219	if (msgsz > msghdr->msg_ts)
1220		msgsz = msghdr->msg_ts;
1221
1222	/*
1223	 * Return the type to the user.
1224	 */
1225
1226	mtx_unlock(&msq_mtx);
1227	error = copyout(&(msghdr->msg_type), user_msgp,
1228	    sizeof(msghdr->msg_type));
1229	mtx_lock(&msq_mtx);
1230	if (error != 0) {
1231		DPRINTF(("error (%d) copying out message type\n", error));
1232		msg_freehdr(msghdr);
1233		wakeup(msqkptr);
1234		goto done2;
1235	}
1236	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1237
1238	/*
1239	 * Return the segments to the user
1240	 */
1241
1242	next = msghdr->msg_spot;
1243	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1244		size_t tlen;
1245
1246		if (msgsz - len > msginfo.msgssz)
1247			tlen = msginfo.msgssz;
1248		else
1249			tlen = msgsz - len;
1250		if (next <= -1)
1251			panic("next too low #3");
1252		if (next >= msginfo.msgseg)
1253			panic("next out of range #3");
1254		mtx_unlock(&msq_mtx);
1255		error = copyout(&msgpool[next * msginfo.msgssz],
1256		    user_msgp, tlen);
1257		mtx_lock(&msq_mtx);
1258		if (error != 0) {
1259			DPRINTF(("error (%d) copying out message segment\n",
1260			    error));
1261			msg_freehdr(msghdr);
1262			wakeup(msqkptr);
1263			goto done2;
1264		}
1265		user_msgp = (char *)user_msgp + tlen;
1266		next = msgmaps[next].next;
1267	}
1268
1269	/*
1270	 * Done, return the actual number of bytes copied out.
1271	 */
1272
1273	msg_freehdr(msghdr);
1274	wakeup(msqkptr);
1275	td->td_retval[0] = msgsz;
1276done2:
1277	mtx_unlock(&msq_mtx);
1278	return (error);
1279}
1280
1281static int
1282sysctl_msqids(SYSCTL_HANDLER_ARGS)
1283{
1284
1285	return (SYSCTL_OUT(req, msqids,
1286	    sizeof(struct msqid_kernel) * msginfo.msgmni));
1287}
1288
1289SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
1290    "Maximum message size");
1291SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0,
1292    "Number of message queue identifiers");
1293SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RDTUN, &msginfo.msgmnb, 0,
1294    "Maximum number of bytes in a queue");
1295SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RDTUN, &msginfo.msgtql, 0,
1296    "Maximum number of messages in the system");
1297SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0,
1298    "Size of a message segment");
1299SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
1300    "Number of message segments");
1301SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1302    NULL, 0, sysctl_msqids, "", "Message queue IDs");
1303