sysv_msg.c revision 129882
1/*
2 * Implementation of SVID messages
3 *
4 * Author:  Daniel Boulet
5 *
6 * Copyright 1993 Daniel Boulet and RTMX Inc.
7 *
8 * This system call was implemented by Daniel Boulet under contract from RTMX.
9 *
10 * Redistribution and use in source forms, with and without modification,
11 * are permitted provided that this entire comment appears intact.
12 *
13 * Redistribution in binary form may occur without any restrictions.
14 * Obviously, it would be nice if you gave credit where credit is due
15 * but requiring it would be too onerous.
16 *
17 * This software is provided ``AS IS'' without any warranties of any kind.
18 */
19
20#include <sys/cdefs.h>
21__FBSDID("$FreeBSD: head/sys/kern/sysv_msg.c 129882 2004-05-30 20:34:58Z phk $");
22
23#include "opt_sysvipc.h"
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/sysproto.h>
28#include <sys/kernel.h>
29#include <sys/proc.h>
30#include <sys/lock.h>
31#include <sys/mutex.h>
32#include <sys/module.h>
33#include <sys/msg.h>
34#include <sys/syscall.h>
35#include <sys/sysent.h>
36#include <sys/sysctl.h>
37#include <sys/malloc.h>
38#include <sys/jail.h>
39
40static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
41
42static void msginit(void);
43static int msgunload(void);
44static int sysvmsg_modload(struct module *, int, void *);
45
46#ifdef MSG_DEBUG
47#define DPRINTF(a)	printf a
48#else
49#define DPRINTF(a)
50#endif
51
52static void msg_freehdr(struct msg *msghdr);
53
54/* XXX casting to (sy_call_t *) is bogus, as usual. */
55static sy_call_t *msgcalls[] = {
56	(sy_call_t *)msgctl, (sy_call_t *)msgget,
57	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
58};
59
60struct msg {
61	struct	msg *msg_next;	/* next msg in the chain */
62	long	msg_type;	/* type of this message */
63    				/* >0 -> type of this message */
64    				/* 0 -> free header */
65	u_short	msg_ts;		/* size of this message */
66	short	msg_spot;	/* location of start of msg in buffer */
67};
68
69
70#ifndef MSGSSZ
71#define MSGSSZ	8		/* Each segment must be 2^N long */
72#endif
73#ifndef MSGSEG
74#define MSGSEG	2048		/* must be less than 32767 */
75#endif
76#define MSGMAX	(MSGSSZ*MSGSEG)
77#ifndef MSGMNB
78#define MSGMNB	2048		/* max # of bytes in a queue */
79#endif
80#ifndef MSGMNI
81#define MSGMNI	40
82#endif
83#ifndef MSGTQL
84#define MSGTQL	40
85#endif
86
87/*
88 * Based on the configuration parameters described in an SVR2 (yes, two)
89 * config(1m) man page.
90 *
91 * Each message is broken up and stored in segments that are msgssz bytes
92 * long.  For efficiency reasons, this should be a power of two.  Also,
93 * it doesn't make sense if it is less than 8 or greater than about 256.
94 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
95 * two between 8 and 1024 inclusive (and panic's if it isn't).
96 */
97struct msginfo msginfo = {
98                MSGMAX,         /* max chars in a message */
99                MSGMNI,         /* # of message queue identifiers */
100                MSGMNB,         /* max chars in a queue */
101                MSGTQL,         /* max messages in system */
102                MSGSSZ,         /* size of a message segment */
103                		/* (must be small power of 2 greater than 4) */
104                MSGSEG          /* number of message segments */
105};
106
107/*
108 * macros to convert between msqid_ds's and msqid's.
109 * (specific to this implementation)
110 */
111#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
112#define MSQID_IX(id)	((id) & 0xffff)
113#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
114
115/*
116 * The rest of this file is specific to this particular implementation.
117 */
118
119struct msgmap {
120	short	next;		/* next segment in buffer */
121    				/* -1 -> available */
122    				/* 0..(MSGSEG-1) -> index of next segment */
123};
124
125#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
126
127static int nfree_msgmaps;	/* # of free map entries */
128static short free_msgmaps;	/* head of linked list of free map entries */
129static struct msg *free_msghdrs;/* list of free msg headers */
130static char *msgpool;		/* MSGMAX byte long msg buffer pool */
131static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
132static struct msg *msghdrs;	/* MSGTQL msg headers */
133static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
134static struct mtx msq_mtx;	/* global mutex for message queues. */
135
136static void
137msginit()
138{
139	register int i;
140
141	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
142	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
143	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
144	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
145
146	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
147	if (msgpool == NULL)
148		panic("msgpool is NULL");
149	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
150	if (msgmaps == NULL)
151		panic("msgmaps is NULL");
152	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
153	if (msghdrs == NULL)
154		panic("msghdrs is NULL");
155	msqids = malloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
156	if (msqids == NULL)
157		panic("msqids is NULL");
158
159	/*
160	 * msginfo.msgssz should be a power of two for efficiency reasons.
161	 * It is also pretty silly if msginfo.msgssz is less than 8
162	 * or greater than about 256 so ...
163	 */
164
165	i = 8;
166	while (i < 1024 && i != msginfo.msgssz)
167		i <<= 1;
168    	if (i != msginfo.msgssz) {
169		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
170		    msginfo.msgssz));
171		panic("msginfo.msgssz not a small power of 2");
172	}
173
174	if (msginfo.msgseg > 32767) {
175		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
176		panic("msginfo.msgseg > 32767");
177	}
178
179	if (msgmaps == NULL)
180		panic("msgmaps is NULL");
181
182	for (i = 0; i < msginfo.msgseg; i++) {
183		if (i > 0)
184			msgmaps[i-1].next = i;
185		msgmaps[i].next = -1;	/* implies entry is available */
186	}
187	free_msgmaps = 0;
188	nfree_msgmaps = msginfo.msgseg;
189
190	if (msghdrs == NULL)
191		panic("msghdrs is NULL");
192
193	for (i = 0; i < msginfo.msgtql; i++) {
194		msghdrs[i].msg_type = 0;
195		if (i > 0)
196			msghdrs[i-1].msg_next = &msghdrs[i];
197		msghdrs[i].msg_next = NULL;
198    	}
199	free_msghdrs = &msghdrs[0];
200
201	if (msqids == NULL)
202		panic("msqids is NULL");
203
204	for (i = 0; i < msginfo.msgmni; i++) {
205		msqids[i].msg_qbytes = 0;	/* implies entry is available */
206		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
207		msqids[i].msg_perm.mode = 0;
208	}
209	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
210}
211
212static int
213msgunload()
214{
215	struct msqid_ds *msqptr;
216	int msqid;
217
218	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
219		/*
220		 * Look for an unallocated and unlocked msqid_ds.
221		 * msqid_ds's can be locked by msgsnd or msgrcv while
222		 * they are copying the message in/out.  We can't
223		 * re-use the entry until they release it.
224		 */
225		msqptr = &msqids[msqid];
226		if (msqptr->msg_qbytes != 0 ||
227		    (msqptr->msg_perm.mode & MSG_LOCKED) != 0)
228			break;
229	}
230	if (msqid != msginfo.msgmni)
231		return (EBUSY);
232
233	free(msgpool, M_MSG);
234	free(msgmaps, M_MSG);
235	free(msghdrs, M_MSG);
236	free(msqids, M_MSG);
237	mtx_destroy(&msq_mtx);
238	return (0);
239}
240
241
242static int
243sysvmsg_modload(struct module *module, int cmd, void *arg)
244{
245	int error = 0;
246
247	switch (cmd) {
248	case MOD_LOAD:
249		msginit();
250		break;
251	case MOD_UNLOAD:
252		error = msgunload();
253		break;
254	case MOD_SHUTDOWN:
255		break;
256	default:
257		error = EINVAL;
258		break;
259	}
260	return (error);
261}
262
263static moduledata_t sysvmsg_mod = {
264	"sysvmsg",
265	&sysvmsg_modload,
266	NULL
267};
268
269SYSCALL_MODULE_HELPER(msgsys);
270SYSCALL_MODULE_HELPER(msgctl);
271SYSCALL_MODULE_HELPER(msgget);
272SYSCALL_MODULE_HELPER(msgsnd);
273SYSCALL_MODULE_HELPER(msgrcv);
274
275DECLARE_MODULE(sysvmsg, sysvmsg_mod,
276	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
277MODULE_VERSION(sysvmsg, 1);
278
279/*
280 * Entry point for all MSG calls
281 *
282 * MPSAFE
283 */
284int
285msgsys(td, uap)
286	struct thread *td;
287	/* XXX actually varargs. */
288	struct msgsys_args /* {
289		int	which;
290		int	a2;
291		int	a3;
292		int	a4;
293		int	a5;
294		int	a6;
295	} */ *uap;
296{
297	int error;
298
299	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
300		return (ENOSYS);
301	if (uap->which < 0 ||
302	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
303		return (EINVAL);
304	error = (*msgcalls[uap->which])(td, &uap->a2);
305	return (error);
306}
307
308static void
309msg_freehdr(msghdr)
310	struct msg *msghdr;
311{
312	while (msghdr->msg_ts > 0) {
313		short next;
314		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
315			panic("msghdr->msg_spot out of range");
316		next = msgmaps[msghdr->msg_spot].next;
317		msgmaps[msghdr->msg_spot].next = free_msgmaps;
318		free_msgmaps = msghdr->msg_spot;
319		nfree_msgmaps++;
320		msghdr->msg_spot = next;
321		if (msghdr->msg_ts >= msginfo.msgssz)
322			msghdr->msg_ts -= msginfo.msgssz;
323		else
324			msghdr->msg_ts = 0;
325	}
326	if (msghdr->msg_spot != -1)
327		panic("msghdr->msg_spot != -1");
328	msghdr->msg_next = free_msghdrs;
329	free_msghdrs = msghdr;
330}
331
332#ifndef _SYS_SYSPROTO_H_
333struct msgctl_args {
334	int	msqid;
335	int	cmd;
336	struct	msqid_ds *buf;
337};
338#endif
339
340/*
341 * MPSAFE
342 */
343int
344msgctl(td, uap)
345	struct thread *td;
346	register struct msgctl_args *uap;
347{
348	int msqid = uap->msqid;
349	int cmd = uap->cmd;
350	struct msqid_ds *user_msqptr = uap->buf;
351	int rval, error;
352	struct msqid_ds msqbuf;
353	register struct msqid_ds *msqptr;
354
355	DPRINTF(("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr));
356	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
357		return (ENOSYS);
358
359	msqid = IPCID_TO_IX(msqid);
360
361	if (msqid < 0 || msqid >= msginfo.msgmni) {
362		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
363		    msginfo.msgmni));
364		return (EINVAL);
365	}
366	if (cmd == IPC_SET &&
367	    (error = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
368		return (error);
369
370	msqptr = &msqids[msqid];
371
372	mtx_lock(&msq_mtx);
373	if (msqptr->msg_qbytes == 0) {
374		DPRINTF(("no such msqid\n"));
375		error = EINVAL;
376		goto done2;
377	}
378	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
379		DPRINTF(("wrong sequence number\n"));
380		error = EINVAL;
381		goto done2;
382	}
383
384	error = 0;
385	rval = 0;
386
387	switch (cmd) {
388
389	case IPC_RMID:
390	{
391		struct msg *msghdr;
392		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
393			goto done2;
394		/* Free the message headers */
395		msghdr = msqptr->msg_first;
396		while (msghdr != NULL) {
397			struct msg *msghdr_tmp;
398
399			/* Free the segments of each message */
400			msqptr->msg_cbytes -= msghdr->msg_ts;
401			msqptr->msg_qnum--;
402			msghdr_tmp = msghdr;
403			msghdr = msghdr->msg_next;
404			msg_freehdr(msghdr_tmp);
405		}
406
407		if (msqptr->msg_cbytes != 0)
408			panic("msg_cbytes is screwed up");
409		if (msqptr->msg_qnum != 0)
410			panic("msg_qnum is screwed up");
411
412		msqptr->msg_qbytes = 0;	/* Mark it as free */
413
414		wakeup(msqptr);
415	}
416
417		break;
418
419	case IPC_SET:
420		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
421			goto done2;
422		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
423			error = suser(td);
424			if (error)
425				goto done2;
426		}
427		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
428			DPRINTF(("can't increase msg_qbytes beyond %d"
429			    "(truncating)\n", msginfo.msgmnb));
430			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
431		}
432		if (msqbuf.msg_qbytes == 0) {
433			DPRINTF(("can't reduce msg_qbytes to 0\n"));
434			error = EINVAL;		/* non-standard errno! */
435			goto done2;
436		}
437		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
438		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
439		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
440		    (msqbuf.msg_perm.mode & 0777);
441		msqptr->msg_qbytes = msqbuf.msg_qbytes;
442		msqptr->msg_ctime = time_second;
443		break;
444
445	case IPC_STAT:
446		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
447			DPRINTF(("requester doesn't have read access\n"));
448			goto done2;
449		}
450		break;
451
452	default:
453		DPRINTF(("invalid command %d\n", cmd));
454		error = EINVAL;
455		goto done2;
456	}
457
458	if (error == 0)
459		td->td_retval[0] = rval;
460done2:
461	mtx_unlock(&msq_mtx);
462	if (cmd == IPC_STAT && error == 0)
463		error = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
464	return(error);
465}
466
467#ifndef _SYS_SYSPROTO_H_
468struct msgget_args {
469	key_t	key;
470	int	msgflg;
471};
472#endif
473
474/*
475 * MPSAFE
476 */
477int
478msgget(td, uap)
479	struct thread *td;
480	register struct msgget_args *uap;
481{
482	int msqid, error = 0;
483	int key = uap->key;
484	int msgflg = uap->msgflg;
485	struct ucred *cred = td->td_ucred;
486	register struct msqid_ds *msqptr = NULL;
487
488	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
489
490	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
491		return (ENOSYS);
492
493	mtx_lock(&msq_mtx);
494	if (key != IPC_PRIVATE) {
495		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
496			msqptr = &msqids[msqid];
497			if (msqptr->msg_qbytes != 0 &&
498			    msqptr->msg_perm.key == key)
499				break;
500		}
501		if (msqid < msginfo.msgmni) {
502			DPRINTF(("found public key\n"));
503			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
504				DPRINTF(("not exclusive\n"));
505				error = EEXIST;
506				goto done2;
507			}
508			if ((error = ipcperm(td, &msqptr->msg_perm, msgflg & 0700))) {
509				DPRINTF(("requester doesn't have 0%o access\n",
510				    msgflg & 0700));
511				goto done2;
512			}
513			goto found;
514		}
515	}
516
517	DPRINTF(("need to allocate the msqid_ds\n"));
518	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
519		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
520			/*
521			 * Look for an unallocated and unlocked msqid_ds.
522			 * msqid_ds's can be locked by msgsnd or msgrcv while
523			 * they are copying the message in/out.  We can't
524			 * re-use the entry until they release it.
525			 */
526			msqptr = &msqids[msqid];
527			if (msqptr->msg_qbytes == 0 &&
528			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
529				break;
530		}
531		if (msqid == msginfo.msgmni) {
532			DPRINTF(("no more msqid_ds's available\n"));
533			error = ENOSPC;
534			goto done2;
535		}
536		DPRINTF(("msqid %d is available\n", msqid));
537		msqptr->msg_perm.key = key;
538		msqptr->msg_perm.cuid = cred->cr_uid;
539		msqptr->msg_perm.uid = cred->cr_uid;
540		msqptr->msg_perm.cgid = cred->cr_gid;
541		msqptr->msg_perm.gid = cred->cr_gid;
542		msqptr->msg_perm.mode = (msgflg & 0777);
543		/* Make sure that the returned msqid is unique */
544		msqptr->msg_perm.seq = (msqptr->msg_perm.seq + 1) & 0x7fff;
545		msqptr->msg_first = NULL;
546		msqptr->msg_last = NULL;
547		msqptr->msg_cbytes = 0;
548		msqptr->msg_qnum = 0;
549		msqptr->msg_qbytes = msginfo.msgmnb;
550		msqptr->msg_lspid = 0;
551		msqptr->msg_lrpid = 0;
552		msqptr->msg_stime = 0;
553		msqptr->msg_rtime = 0;
554		msqptr->msg_ctime = time_second;
555	} else {
556		DPRINTF(("didn't find it and wasn't asked to create it\n"));
557		error = ENOENT;
558		goto done2;
559	}
560
561found:
562	/* Construct the unique msqid */
563	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
564done2:
565	mtx_unlock(&msq_mtx);
566	return (error);
567}
568
569#ifndef _SYS_SYSPROTO_H_
570struct msgsnd_args {
571	int	msqid;
572	const void	*msgp;
573	size_t	msgsz;
574	int	msgflg;
575};
576#endif
577
578/*
579 * MPSAFE
580 */
581int
582msgsnd(td, uap)
583	struct thread *td;
584	register struct msgsnd_args *uap;
585{
586	int msqid = uap->msqid;
587	const void *user_msgp = uap->msgp;
588	size_t msgsz = uap->msgsz;
589	int msgflg = uap->msgflg;
590	int segs_needed, error = 0;
591	register struct msqid_ds *msqptr;
592	register struct msg *msghdr;
593	short next;
594
595	DPRINTF(("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
596	    msgflg));
597	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
598		return (ENOSYS);
599
600	mtx_lock(&msq_mtx);
601	msqid = IPCID_TO_IX(msqid);
602
603	if (msqid < 0 || msqid >= msginfo.msgmni) {
604		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
605		    msginfo.msgmni));
606		error = EINVAL;
607		goto done2;
608	}
609
610	msqptr = &msqids[msqid];
611	if (msqptr->msg_qbytes == 0) {
612		DPRINTF(("no such message queue id\n"));
613		error = EINVAL;
614		goto done2;
615	}
616	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
617		DPRINTF(("wrong sequence number\n"));
618		error = EINVAL;
619		goto done2;
620	}
621
622	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_W))) {
623		DPRINTF(("requester doesn't have write access\n"));
624		goto done2;
625	}
626
627	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
628	DPRINTF(("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
629	    segs_needed));
630	for (;;) {
631		int need_more_resources = 0;
632
633		/*
634		 * check msgsz
635		 * (inside this loop in case msg_qbytes changes while we sleep)
636		 */
637
638		if (msgsz > msqptr->msg_qbytes) {
639			DPRINTF(("msgsz > msqptr->msg_qbytes\n"));
640			error = EINVAL;
641			goto done2;
642		}
643
644		if (msqptr->msg_perm.mode & MSG_LOCKED) {
645			DPRINTF(("msqid is locked\n"));
646			need_more_resources = 1;
647		}
648		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
649			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
650			need_more_resources = 1;
651		}
652		if (segs_needed > nfree_msgmaps) {
653			DPRINTF(("segs_needed > nfree_msgmaps\n"));
654			need_more_resources = 1;
655		}
656		if (free_msghdrs == NULL) {
657			DPRINTF(("no more msghdrs\n"));
658			need_more_resources = 1;
659		}
660
661		if (need_more_resources) {
662			int we_own_it;
663
664			if ((msgflg & IPC_NOWAIT) != 0) {
665				DPRINTF(("need more resources but caller "
666				    "doesn't want to wait\n"));
667				error = EAGAIN;
668				goto done2;
669			}
670
671			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
672				DPRINTF(("we don't own the msqid_ds\n"));
673				we_own_it = 0;
674			} else {
675				/* Force later arrivals to wait for our
676				   request */
677				DPRINTF(("we own the msqid_ds\n"));
678				msqptr->msg_perm.mode |= MSG_LOCKED;
679				we_own_it = 1;
680			}
681			DPRINTF(("goodnight\n"));
682			error = msleep(msqptr, &msq_mtx, (PZERO - 4) | PCATCH,
683			    "msgwait", 0);
684			DPRINTF(("good morning, error=%d\n", error));
685			if (we_own_it)
686				msqptr->msg_perm.mode &= ~MSG_LOCKED;
687			if (error != 0) {
688				DPRINTF(("msgsnd:  interrupted system call\n"));
689				error = EINTR;
690				goto done2;
691			}
692
693			/*
694			 * Make sure that the msq queue still exists
695			 */
696
697			if (msqptr->msg_qbytes == 0) {
698				DPRINTF(("msqid deleted\n"));
699				error = EIDRM;
700				goto done2;
701			}
702
703		} else {
704			DPRINTF(("got all the resources that we need\n"));
705			break;
706		}
707	}
708
709	/*
710	 * We have the resources that we need.
711	 * Make sure!
712	 */
713
714	if (msqptr->msg_perm.mode & MSG_LOCKED)
715		panic("msg_perm.mode & MSG_LOCKED");
716	if (segs_needed > nfree_msgmaps)
717		panic("segs_needed > nfree_msgmaps");
718	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
719		panic("msgsz + msg_cbytes > msg_qbytes");
720	if (free_msghdrs == NULL)
721		panic("no more msghdrs");
722
723	/*
724	 * Re-lock the msqid_ds in case we page-fault when copying in the
725	 * message
726	 */
727
728	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
729		panic("msqid_ds is already locked");
730	msqptr->msg_perm.mode |= MSG_LOCKED;
731
732	/*
733	 * Allocate a message header
734	 */
735
736	msghdr = free_msghdrs;
737	free_msghdrs = msghdr->msg_next;
738	msghdr->msg_spot = -1;
739	msghdr->msg_ts = msgsz;
740
741	/*
742	 * Allocate space for the message
743	 */
744
745	while (segs_needed > 0) {
746		if (nfree_msgmaps <= 0)
747			panic("not enough msgmaps");
748		if (free_msgmaps == -1)
749			panic("nil free_msgmaps");
750		next = free_msgmaps;
751		if (next <= -1)
752			panic("next too low #1");
753		if (next >= msginfo.msgseg)
754			panic("next out of range #1");
755		DPRINTF(("allocating segment %d to message\n", next));
756		free_msgmaps = msgmaps[next].next;
757		nfree_msgmaps--;
758		msgmaps[next].next = msghdr->msg_spot;
759		msghdr->msg_spot = next;
760		segs_needed--;
761	}
762
763	/*
764	 * Copy in the message type
765	 */
766
767	mtx_unlock(&msq_mtx);
768	if ((error = copyin(user_msgp, &msghdr->msg_type,
769	    sizeof(msghdr->msg_type))) != 0) {
770		mtx_lock(&msq_mtx);
771		DPRINTF(("error %d copying the message type\n", error));
772		msg_freehdr(msghdr);
773		msqptr->msg_perm.mode &= ~MSG_LOCKED;
774		wakeup(msqptr);
775		goto done2;
776	}
777	mtx_lock(&msq_mtx);
778	user_msgp = (const char *)user_msgp + sizeof(msghdr->msg_type);
779
780	/*
781	 * Validate the message type
782	 */
783
784	if (msghdr->msg_type < 1) {
785		msg_freehdr(msghdr);
786		msqptr->msg_perm.mode &= ~MSG_LOCKED;
787		wakeup(msqptr);
788		DPRINTF(("mtype (%d) < 1\n", msghdr->msg_type));
789		error = EINVAL;
790		goto done2;
791	}
792
793	/*
794	 * Copy in the message body
795	 */
796
797	next = msghdr->msg_spot;
798	while (msgsz > 0) {
799		size_t tlen;
800		if (msgsz > msginfo.msgssz)
801			tlen = msginfo.msgssz;
802		else
803			tlen = msgsz;
804		if (next <= -1)
805			panic("next too low #2");
806		if (next >= msginfo.msgseg)
807			panic("next out of range #2");
808		mtx_unlock(&msq_mtx);
809		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
810		    tlen)) != 0) {
811			mtx_lock(&msq_mtx);
812			DPRINTF(("error %d copying in message segment\n",
813			    error));
814			msg_freehdr(msghdr);
815			msqptr->msg_perm.mode &= ~MSG_LOCKED;
816			wakeup(msqptr);
817			goto done2;
818		}
819		mtx_lock(&msq_mtx);
820		msgsz -= tlen;
821		user_msgp = (const char *)user_msgp + tlen;
822		next = msgmaps[next].next;
823	}
824	if (next != -1)
825		panic("didn't use all the msg segments");
826
827	/*
828	 * We've got the message.  Unlock the msqid_ds.
829	 */
830
831	msqptr->msg_perm.mode &= ~MSG_LOCKED;
832
833	/*
834	 * Make sure that the msqid_ds is still allocated.
835	 */
836
837	if (msqptr->msg_qbytes == 0) {
838		msg_freehdr(msghdr);
839		wakeup(msqptr);
840		error = EIDRM;
841		goto done2;
842	}
843
844	/*
845	 * Put the message into the queue
846	 */
847
848	if (msqptr->msg_first == NULL) {
849		msqptr->msg_first = msghdr;
850		msqptr->msg_last = msghdr;
851	} else {
852		msqptr->msg_last->msg_next = msghdr;
853		msqptr->msg_last = msghdr;
854	}
855	msqptr->msg_last->msg_next = NULL;
856
857	msqptr->msg_cbytes += msghdr->msg_ts;
858	msqptr->msg_qnum++;
859	msqptr->msg_lspid = td->td_proc->p_pid;
860	msqptr->msg_stime = time_second;
861
862	wakeup(msqptr);
863	td->td_retval[0] = 0;
864done2:
865	mtx_unlock(&msq_mtx);
866	return (error);
867}
868
869#ifndef _SYS_SYSPROTO_H_
870struct msgrcv_args {
871	int	msqid;
872	void	*msgp;
873	size_t	msgsz;
874	long	msgtyp;
875	int	msgflg;
876};
877#endif
878
879/*
880 * MPSAFE
881 */
882int
883msgrcv(td, uap)
884	struct thread *td;
885	register struct msgrcv_args *uap;
886{
887	int msqid = uap->msqid;
888	void *user_msgp = uap->msgp;
889	size_t msgsz = uap->msgsz;
890	long msgtyp = uap->msgtyp;
891	int msgflg = uap->msgflg;
892	size_t len;
893	register struct msqid_ds *msqptr;
894	register struct msg *msghdr;
895	int error = 0;
896	short next;
897
898	DPRINTF(("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
899	    msgsz, msgtyp, msgflg));
900
901	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
902		return (ENOSYS);
903
904	msqid = IPCID_TO_IX(msqid);
905
906	if (msqid < 0 || msqid >= msginfo.msgmni) {
907		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
908		    msginfo.msgmni));
909		return (EINVAL);
910	}
911
912	msqptr = &msqids[msqid];
913	mtx_lock(&msq_mtx);
914	if (msqptr->msg_qbytes == 0) {
915		DPRINTF(("no such message queue id\n"));
916		error = EINVAL;
917		goto done2;
918	}
919	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
920		DPRINTF(("wrong sequence number\n"));
921		error = EINVAL;
922		goto done2;
923	}
924
925	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
926		DPRINTF(("requester doesn't have read access\n"));
927		goto done2;
928	}
929
930	msghdr = NULL;
931	while (msghdr == NULL) {
932		if (msgtyp == 0) {
933			msghdr = msqptr->msg_first;
934			if (msghdr != NULL) {
935				if (msgsz < msghdr->msg_ts &&
936				    (msgflg & MSG_NOERROR) == 0) {
937					DPRINTF(("first message on the queue "
938					    "is too big (want %d, got %d)\n",
939					    msgsz, msghdr->msg_ts));
940					error = E2BIG;
941					goto done2;
942				}
943				if (msqptr->msg_first == msqptr->msg_last) {
944					msqptr->msg_first = NULL;
945					msqptr->msg_last = NULL;
946				} else {
947					msqptr->msg_first = msghdr->msg_next;
948					if (msqptr->msg_first == NULL)
949						panic("msg_first/last screwed up #1");
950				}
951			}
952		} else {
953			struct msg *previous;
954			struct msg **prev;
955
956			previous = NULL;
957			prev = &(msqptr->msg_first);
958			while ((msghdr = *prev) != NULL) {
959				/*
960				 * Is this message's type an exact match or is
961				 * this message's type less than or equal to
962				 * the absolute value of a negative msgtyp?
963				 * Note that the second half of this test can
964				 * NEVER be true if msgtyp is positive since
965				 * msg_type is always positive!
966				 */
967
968				if (msgtyp == msghdr->msg_type ||
969				    msghdr->msg_type <= -msgtyp) {
970					DPRINTF(("found message type %d, "
971					    "requested %d\n",
972					    msghdr->msg_type, msgtyp));
973					if (msgsz < msghdr->msg_ts &&
974					    (msgflg & MSG_NOERROR) == 0) {
975						DPRINTF(("requested message "
976						    "on the queue is too big "
977						    "(want %d, got %d)\n",
978						    msgsz, msghdr->msg_ts));
979						error = E2BIG;
980						goto done2;
981					}
982					*prev = msghdr->msg_next;
983					if (msghdr == msqptr->msg_last) {
984						if (previous == NULL) {
985							if (prev !=
986							    &msqptr->msg_first)
987								panic("msg_first/last screwed up #2");
988							msqptr->msg_first =
989							    NULL;
990							msqptr->msg_last =
991							    NULL;
992						} else {
993							if (prev ==
994							    &msqptr->msg_first)
995								panic("msg_first/last screwed up #3");
996							msqptr->msg_last =
997							    previous;
998						}
999					}
1000					break;
1001				}
1002				previous = msghdr;
1003				prev = &(msghdr->msg_next);
1004			}
1005		}
1006
1007		/*
1008		 * We've either extracted the msghdr for the appropriate
1009		 * message or there isn't one.
1010		 * If there is one then bail out of this loop.
1011		 */
1012
1013		if (msghdr != NULL)
1014			break;
1015
1016		/*
1017		 * Hmph!  No message found.  Does the user want to wait?
1018		 */
1019
1020		if ((msgflg & IPC_NOWAIT) != 0) {
1021			DPRINTF(("no appropriate message found (msgtyp=%d)\n",
1022			    msgtyp));
1023			/* The SVID says to return ENOMSG. */
1024			error = ENOMSG;
1025			goto done2;
1026		}
1027
1028		/*
1029		 * Wait for something to happen
1030		 */
1031
1032		DPRINTF(("msgrcv:  goodnight\n"));
1033		error = msleep(msqptr, &msq_mtx, (PZERO - 4) | PCATCH,
1034		    "msgwait", 0);
1035		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1036
1037		if (error != 0) {
1038			DPRINTF(("msgsnd:  interrupted system call\n"));
1039			error = EINTR;
1040			goto done2;
1041		}
1042
1043		/*
1044		 * Make sure that the msq queue still exists
1045		 */
1046
1047		if (msqptr->msg_qbytes == 0 ||
1048		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1049			DPRINTF(("msqid deleted\n"));
1050			error = EIDRM;
1051			goto done2;
1052		}
1053	}
1054
1055	/*
1056	 * Return the message to the user.
1057	 *
1058	 * First, do the bookkeeping (before we risk being interrupted).
1059	 */
1060
1061	msqptr->msg_cbytes -= msghdr->msg_ts;
1062	msqptr->msg_qnum--;
1063	msqptr->msg_lrpid = td->td_proc->p_pid;
1064	msqptr->msg_rtime = time_second;
1065
1066	/*
1067	 * Make msgsz the actual amount that we'll be returning.
1068	 * Note that this effectively truncates the message if it is too long
1069	 * (since msgsz is never increased).
1070	 */
1071
1072	DPRINTF(("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1073	    msghdr->msg_ts));
1074	if (msgsz > msghdr->msg_ts)
1075		msgsz = msghdr->msg_ts;
1076
1077	/*
1078	 * Return the type to the user.
1079	 */
1080
1081	mtx_unlock(&msq_mtx);
1082	error = copyout(&(msghdr->msg_type), user_msgp,
1083	    sizeof(msghdr->msg_type));
1084	mtx_lock(&msq_mtx);
1085	if (error != 0) {
1086		DPRINTF(("error (%d) copying out message type\n", error));
1087		msg_freehdr(msghdr);
1088		wakeup(msqptr);
1089		goto done2;
1090	}
1091	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1092
1093	/*
1094	 * Return the segments to the user
1095	 */
1096
1097	next = msghdr->msg_spot;
1098	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1099		size_t tlen;
1100
1101		if (msgsz - len > msginfo.msgssz)
1102			tlen = msginfo.msgssz;
1103		else
1104			tlen = msgsz - len;
1105		if (next <= -1)
1106			panic("next too low #3");
1107		if (next >= msginfo.msgseg)
1108			panic("next out of range #3");
1109		mtx_unlock(&msq_mtx);
1110		error = copyout(&msgpool[next * msginfo.msgssz],
1111		    user_msgp, tlen);
1112		mtx_lock(&msq_mtx);
1113		if (error != 0) {
1114			DPRINTF(("error (%d) copying out message segment\n",
1115			    error));
1116			msg_freehdr(msghdr);
1117			wakeup(msqptr);
1118			goto done2;
1119		}
1120		user_msgp = (char *)user_msgp + tlen;
1121		next = msgmaps[next].next;
1122	}
1123
1124	/*
1125	 * Done, return the actual number of bytes copied out.
1126	 */
1127
1128	msg_freehdr(msghdr);
1129	wakeup(msqptr);
1130	td->td_retval[0] = msgsz;
1131done2:
1132	mtx_unlock(&msq_mtx);
1133	return (error);
1134}
1135
1136static int
1137sysctl_msqids(SYSCTL_HANDLER_ARGS)
1138{
1139
1140	return (SYSCTL_OUT(req, msqids,
1141	    sizeof(struct msqid_ds) * msginfo.msgmni));
1142}
1143
1144SYSCTL_DECL(_kern_ipc);
1145SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1146SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0, "");
1147SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1148SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1149SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0, "");
1150SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0, "");
1151SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1152    NULL, 0, sysctl_msqids, "", "Message queue IDs");
1153