sysv_msg.c revision 137613
1/*
2 * Implementation of SVID messages
3 *
4 * Author:  Daniel Boulet
5 *
6 * Copyright 1993 Daniel Boulet and RTMX Inc.
7 *
8 * This system call was implemented by Daniel Boulet under contract from RTMX.
9 *
10 * Redistribution and use in source forms, with and without modification,
11 * are permitted provided that this entire comment appears intact.
12 *
13 * Redistribution in binary form may occur without any restrictions.
14 * Obviously, it would be nice if you gave credit where credit is due
15 * but requiring it would be too onerous.
16 *
17 * This software is provided ``AS IS'' without any warranties of any kind.
18 */
19
20#include <sys/cdefs.h>
21__FBSDID("$FreeBSD: head/sys/kern/sysv_msg.c 137613 2004-11-12 13:23:47Z rwatson $");
22
23#include "opt_sysvipc.h"
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/sysproto.h>
28#include <sys/kernel.h>
29#include <sys/proc.h>
30#include <sys/lock.h>
31#include <sys/mutex.h>
32#include <sys/module.h>
33#include <sys/msg.h>
34#include <sys/syscall.h>
35#include <sys/sysent.h>
36#include <sys/sysctl.h>
37#include <sys/malloc.h>
38#include <sys/jail.h>
39
40static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
41
42static void msginit(void);
43static int msgunload(void);
44static int sysvmsg_modload(struct module *, int, void *);
45
46#ifdef MSG_DEBUG
47#define DPRINTF(a)	printf a
48#else
49#define DPRINTF(a)
50#endif
51
52static void msg_freehdr(struct msg *msghdr);
53
54/* XXX casting to (sy_call_t *) is bogus, as usual. */
55static sy_call_t *msgcalls[] = {
56	(sy_call_t *)msgctl, (sy_call_t *)msgget,
57	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
58};
59
60#ifndef MSGSSZ
61#define MSGSSZ	8		/* Each segment must be 2^N long */
62#endif
63#ifndef MSGSEG
64#define MSGSEG	2048		/* must be less than 32767 */
65#endif
66#define MSGMAX	(MSGSSZ*MSGSEG)
67#ifndef MSGMNB
68#define MSGMNB	2048		/* max # of bytes in a queue */
69#endif
70#ifndef MSGMNI
71#define MSGMNI	40
72#endif
73#ifndef MSGTQL
74#define MSGTQL	40
75#endif
76
77/*
78 * Based on the configuration parameters described in an SVR2 (yes, two)
79 * config(1m) man page.
80 *
81 * Each message is broken up and stored in segments that are msgssz bytes
82 * long.  For efficiency reasons, this should be a power of two.  Also,
83 * it doesn't make sense if it is less than 8 or greater than about 256.
84 * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
85 * two between 8 and 1024 inclusive (and panic's if it isn't).
86 */
87struct msginfo msginfo = {
88                MSGMAX,         /* max chars in a message */
89                MSGMNI,         /* # of message queue identifiers */
90                MSGMNB,         /* max chars in a queue */
91                MSGTQL,         /* max messages in system */
92                MSGSSZ,         /* size of a message segment */
93                		/* (must be small power of 2 greater than 4) */
94                MSGSEG          /* number of message segments */
95};
96
97/*
98 * macros to convert between msqid_ds's and msqid's.
99 * (specific to this implementation)
100 */
101#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
102#define MSQID_IX(id)	((id) & 0xffff)
103#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
104
105/*
106 * The rest of this file is specific to this particular implementation.
107 */
108
109struct msgmap {
110	short	next;		/* next segment in buffer */
111    				/* -1 -> available */
112    				/* 0..(MSGSEG-1) -> index of next segment */
113};
114
115#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
116
117static int nfree_msgmaps;	/* # of free map entries */
118static short free_msgmaps;	/* head of linked list of free map entries */
119static struct msg *free_msghdrs;/* list of free msg headers */
120static char *msgpool;		/* MSGMAX byte long msg buffer pool */
121static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
122static struct msg *msghdrs;	/* MSGTQL msg headers */
123static struct msqid_kernel *msqids;	/* MSGMNI msqid_kernel struct's */
124static struct mtx msq_mtx;	/* global mutex for message queues. */
125
126static void
127msginit()
128{
129	register int i;
130
131	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
132	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
133	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
134	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
135
136	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
137	if (msgpool == NULL)
138		panic("msgpool is NULL");
139	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
140	if (msgmaps == NULL)
141		panic("msgmaps is NULL");
142	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
143	if (msghdrs == NULL)
144		panic("msghdrs is NULL");
145	msqids = malloc(sizeof(struct msqid_kernel) * msginfo.msgmni, M_MSG,
146	    M_WAITOK);
147	if (msqids == NULL)
148		panic("msqids is NULL");
149
150	/*
151	 * msginfo.msgssz should be a power of two for efficiency reasons.
152	 * It is also pretty silly if msginfo.msgssz is less than 8
153	 * or greater than about 256 so ...
154	 */
155
156	i = 8;
157	while (i < 1024 && i != msginfo.msgssz)
158		i <<= 1;
159    	if (i != msginfo.msgssz) {
160		DPRINTF(("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
161		    msginfo.msgssz));
162		panic("msginfo.msgssz not a small power of 2");
163	}
164
165	if (msginfo.msgseg > 32767) {
166		DPRINTF(("msginfo.msgseg=%d\n", msginfo.msgseg));
167		panic("msginfo.msgseg > 32767");
168	}
169
170	if (msgmaps == NULL)
171		panic("msgmaps is NULL");
172
173	for (i = 0; i < msginfo.msgseg; i++) {
174		if (i > 0)
175			msgmaps[i-1].next = i;
176		msgmaps[i].next = -1;	/* implies entry is available */
177	}
178	free_msgmaps = 0;
179	nfree_msgmaps = msginfo.msgseg;
180
181	if (msghdrs == NULL)
182		panic("msghdrs is NULL");
183
184	for (i = 0; i < msginfo.msgtql; i++) {
185		msghdrs[i].msg_type = 0;
186		if (i > 0)
187			msghdrs[i-1].msg_next = &msghdrs[i];
188		msghdrs[i].msg_next = NULL;
189    	}
190	free_msghdrs = &msghdrs[0];
191
192	if (msqids == NULL)
193		panic("msqids is NULL");
194
195	for (i = 0; i < msginfo.msgmni; i++) {
196		msqids[i].u.msg_qbytes = 0;	/* implies entry is available */
197		msqids[i].u.msg_perm.seq = 0;	/* reset to a known value */
198		msqids[i].u.msg_perm.mode = 0;
199	}
200	mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
201}
202
203static int
204msgunload()
205{
206	struct msqid_kernel *msqkptr;
207	int msqid;
208
209	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
210		/*
211		 * Look for an unallocated and unlocked msqid_ds.
212		 * msqid_ds's can be locked by msgsnd or msgrcv while
213		 * they are copying the message in/out.  We can't
214		 * re-use the entry until they release it.
215		 */
216		msqkptr = &msqids[msqid];
217		if (msqkptr->u.msg_qbytes != 0 ||
218		    (msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
219			break;
220	}
221	if (msqid != msginfo.msgmni)
222		return (EBUSY);
223
224	free(msgpool, M_MSG);
225	free(msgmaps, M_MSG);
226	free(msghdrs, M_MSG);
227	free(msqids, M_MSG);
228	mtx_destroy(&msq_mtx);
229	return (0);
230}
231
232
233static int
234sysvmsg_modload(struct module *module, int cmd, void *arg)
235{
236	int error = 0;
237
238	switch (cmd) {
239	case MOD_LOAD:
240		msginit();
241		break;
242	case MOD_UNLOAD:
243		error = msgunload();
244		break;
245	case MOD_SHUTDOWN:
246		break;
247	default:
248		error = EINVAL;
249		break;
250	}
251	return (error);
252}
253
254static moduledata_t sysvmsg_mod = {
255	"sysvmsg",
256	&sysvmsg_modload,
257	NULL
258};
259
260SYSCALL_MODULE_HELPER(msgsys);
261SYSCALL_MODULE_HELPER(msgctl);
262SYSCALL_MODULE_HELPER(msgget);
263SYSCALL_MODULE_HELPER(msgsnd);
264SYSCALL_MODULE_HELPER(msgrcv);
265
266DECLARE_MODULE(sysvmsg, sysvmsg_mod,
267	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
268MODULE_VERSION(sysvmsg, 1);
269
270/*
271 * Entry point for all MSG calls
272 *
273 * MPSAFE
274 */
275int
276msgsys(td, uap)
277	struct thread *td;
278	/* XXX actually varargs. */
279	struct msgsys_args /* {
280		int	which;
281		int	a2;
282		int	a3;
283		int	a4;
284		int	a5;
285		int	a6;
286	} */ *uap;
287{
288	int error;
289
290	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
291		return (ENOSYS);
292	if (uap->which < 0 ||
293	    uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
294		return (EINVAL);
295	error = (*msgcalls[uap->which])(td, &uap->a2);
296	return (error);
297}
298
299static void
300msg_freehdr(msghdr)
301	struct msg *msghdr;
302{
303	while (msghdr->msg_ts > 0) {
304		short next;
305		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
306			panic("msghdr->msg_spot out of range");
307		next = msgmaps[msghdr->msg_spot].next;
308		msgmaps[msghdr->msg_spot].next = free_msgmaps;
309		free_msgmaps = msghdr->msg_spot;
310		nfree_msgmaps++;
311		msghdr->msg_spot = next;
312		if (msghdr->msg_ts >= msginfo.msgssz)
313			msghdr->msg_ts -= msginfo.msgssz;
314		else
315			msghdr->msg_ts = 0;
316	}
317	if (msghdr->msg_spot != -1)
318		panic("msghdr->msg_spot != -1");
319	msghdr->msg_next = free_msghdrs;
320	free_msghdrs = msghdr;
321}
322
323#ifndef _SYS_SYSPROTO_H_
324struct msgctl_args {
325	int	msqid;
326	int	cmd;
327	struct	msqid_ds *buf;
328};
329#endif
330
331/*
332 * MPSAFE
333 */
334int
335msgctl(td, uap)
336	struct thread *td;
337	register struct msgctl_args *uap;
338{
339	int msqid = uap->msqid;
340	int cmd = uap->cmd;
341	struct msqid_ds *user_msqptr = uap->buf;
342	int rval, error;
343	struct msqid_ds msqbuf;
344	register struct msqid_kernel *msqkptr;
345
346	DPRINTF(("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr));
347	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
348		return (ENOSYS);
349
350	msqid = IPCID_TO_IX(msqid);
351
352	if (msqid < 0 || msqid >= msginfo.msgmni) {
353		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
354		    msginfo.msgmni));
355		return (EINVAL);
356	}
357	if (cmd == IPC_SET &&
358	    (error = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
359		return (error);
360
361	msqkptr = &msqids[msqid];
362
363	mtx_lock(&msq_mtx);
364	if (msqkptr->u.msg_qbytes == 0) {
365		DPRINTF(("no such msqid\n"));
366		error = EINVAL;
367		goto done2;
368	}
369	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
370		DPRINTF(("wrong sequence number\n"));
371		error = EINVAL;
372		goto done2;
373	}
374
375	error = 0;
376	rval = 0;
377
378	switch (cmd) {
379
380	case IPC_RMID:
381	{
382		struct msg *msghdr;
383		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
384			goto done2;
385
386		/* Free the message headers */
387		msghdr = msqkptr->u.msg_first;
388		while (msghdr != NULL) {
389			struct msg *msghdr_tmp;
390
391			/* Free the segments of each message */
392			msqkptr->u.msg_cbytes -= msghdr->msg_ts;
393			msqkptr->u.msg_qnum--;
394			msghdr_tmp = msghdr;
395			msghdr = msghdr->msg_next;
396			msg_freehdr(msghdr_tmp);
397		}
398
399		if (msqkptr->u.msg_cbytes != 0)
400			panic("msg_cbytes is screwed up");
401		if (msqkptr->u.msg_qnum != 0)
402			panic("msg_qnum is screwed up");
403
404		msqkptr->u.msg_qbytes = 0;	/* Mark it as free */
405
406		wakeup(msqkptr);
407	}
408
409		break;
410
411	case IPC_SET:
412		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
413			goto done2;
414		if (msqbuf.msg_qbytes > msqkptr->u.msg_qbytes) {
415			error = suser(td);
416			if (error)
417				goto done2;
418		}
419		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
420			DPRINTF(("can't increase msg_qbytes beyond %d"
421			    "(truncating)\n", msginfo.msgmnb));
422			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
423		}
424		if (msqbuf.msg_qbytes == 0) {
425			DPRINTF(("can't reduce msg_qbytes to 0\n"));
426			error = EINVAL;		/* non-standard errno! */
427			goto done2;
428		}
429		msqkptr->u.msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
430		msqkptr->u.msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
431		msqkptr->u.msg_perm.mode = (msqkptr->u.msg_perm.mode & ~0777) |
432		    (msqbuf.msg_perm.mode & 0777);
433		msqkptr->u.msg_qbytes = msqbuf.msg_qbytes;
434		msqkptr->u.msg_ctime = time_second;
435		break;
436
437	case IPC_STAT:
438		if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
439			DPRINTF(("requester doesn't have read access\n"));
440			goto done2;
441		}
442		break;
443
444	default:
445		DPRINTF(("invalid command %d\n", cmd));
446		error = EINVAL;
447		goto done2;
448	}
449
450	if (error == 0)
451		td->td_retval[0] = rval;
452done2:
453	mtx_unlock(&msq_mtx);
454	if (cmd == IPC_STAT && error == 0)
455		error = copyout(&(msqkptr->u), user_msqptr, sizeof(struct msqid_ds));
456	return(error);
457}
458
459#ifndef _SYS_SYSPROTO_H_
460struct msgget_args {
461	key_t	key;
462	int	msgflg;
463};
464#endif
465
466/*
467 * MPSAFE
468 */
469int
470msgget(td, uap)
471	struct thread *td;
472	register struct msgget_args *uap;
473{
474	int msqid, error = 0;
475	int key = uap->key;
476	int msgflg = uap->msgflg;
477	struct ucred *cred = td->td_ucred;
478	register struct msqid_kernel *msqkptr = NULL;
479
480	DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
481
482	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
483		return (ENOSYS);
484
485	mtx_lock(&msq_mtx);
486	if (key != IPC_PRIVATE) {
487		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
488			msqkptr = &msqids[msqid];
489			if (msqkptr->u.msg_qbytes != 0 &&
490			    msqkptr->u.msg_perm.key == key)
491				break;
492		}
493		if (msqid < msginfo.msgmni) {
494			DPRINTF(("found public key\n"));
495			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
496				DPRINTF(("not exclusive\n"));
497				error = EEXIST;
498				goto done2;
499			}
500			if ((error = ipcperm(td, &msqkptr->u.msg_perm,
501			    msgflg & 0700))) {
502				DPRINTF(("requester doesn't have 0%o access\n",
503				    msgflg & 0700));
504				goto done2;
505			}
506			goto found;
507		}
508	}
509
510	DPRINTF(("need to allocate the msqid_ds\n"));
511	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
512		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
513			/*
514			 * Look for an unallocated and unlocked msqid_ds.
515			 * msqid_ds's can be locked by msgsnd or msgrcv while
516			 * they are copying the message in/out.  We can't
517			 * re-use the entry until they release it.
518			 */
519			msqkptr = &msqids[msqid];
520			if (msqkptr->u.msg_qbytes == 0 &&
521			    (msqkptr->u.msg_perm.mode & MSG_LOCKED) == 0)
522				break;
523		}
524		if (msqid == msginfo.msgmni) {
525			DPRINTF(("no more msqid_ds's available\n"));
526			error = ENOSPC;
527			goto done2;
528		}
529		DPRINTF(("msqid %d is available\n", msqid));
530		msqkptr->u.msg_perm.key = key;
531		msqkptr->u.msg_perm.cuid = cred->cr_uid;
532		msqkptr->u.msg_perm.uid = cred->cr_uid;
533		msqkptr->u.msg_perm.cgid = cred->cr_gid;
534		msqkptr->u.msg_perm.gid = cred->cr_gid;
535		msqkptr->u.msg_perm.mode = (msgflg & 0777);
536		/* Make sure that the returned msqid is unique */
537		msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff;
538		msqkptr->u.msg_first = NULL;
539		msqkptr->u.msg_last = NULL;
540		msqkptr->u.msg_cbytes = 0;
541		msqkptr->u.msg_qnum = 0;
542		msqkptr->u.msg_qbytes = msginfo.msgmnb;
543		msqkptr->u.msg_lspid = 0;
544		msqkptr->u.msg_lrpid = 0;
545		msqkptr->u.msg_stime = 0;
546		msqkptr->u.msg_rtime = 0;
547		msqkptr->u.msg_ctime = time_second;
548	} else {
549		DPRINTF(("didn't find it and wasn't asked to create it\n"));
550		error = ENOENT;
551		goto done2;
552	}
553
554found:
555	/* Construct the unique msqid */
556	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqkptr->u.msg_perm);
557done2:
558	mtx_unlock(&msq_mtx);
559	return (error);
560}
561
562#ifndef _SYS_SYSPROTO_H_
563struct msgsnd_args {
564	int	msqid;
565	const void	*msgp;
566	size_t	msgsz;
567	int	msgflg;
568};
569#endif
570
571/*
572 * MPSAFE
573 */
574int
575msgsnd(td, uap)
576	struct thread *td;
577	register struct msgsnd_args *uap;
578{
579	int msqid = uap->msqid;
580	const void *user_msgp = uap->msgp;
581	size_t msgsz = uap->msgsz;
582	int msgflg = uap->msgflg;
583	int segs_needed, error = 0;
584	register struct msqid_kernel *msqkptr;
585	register struct msg *msghdr;
586	short next;
587
588	DPRINTF(("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
589	    msgflg));
590	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
591		return (ENOSYS);
592
593	mtx_lock(&msq_mtx);
594	msqid = IPCID_TO_IX(msqid);
595
596	if (msqid < 0 || msqid >= msginfo.msgmni) {
597		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
598		    msginfo.msgmni));
599		error = EINVAL;
600		goto done2;
601	}
602
603	msqkptr = &msqids[msqid];
604	if (msqkptr->u.msg_qbytes == 0) {
605		DPRINTF(("no such message queue id\n"));
606		error = EINVAL;
607		goto done2;
608	}
609	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
610		DPRINTF(("wrong sequence number\n"));
611		error = EINVAL;
612		goto done2;
613	}
614
615	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
616		DPRINTF(("requester doesn't have write access\n"));
617		goto done2;
618	}
619
620	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
621	DPRINTF(("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
622	    segs_needed));
623	for (;;) {
624		int need_more_resources = 0;
625
626		/*
627		 * check msgsz
628		 * (inside this loop in case msg_qbytes changes while we sleep)
629		 */
630
631		if (msgsz > msqkptr->u.msg_qbytes) {
632			DPRINTF(("msgsz > msqkptr->u.msg_qbytes\n"));
633			error = EINVAL;
634			goto done2;
635		}
636
637		if (msqkptr->u.msg_perm.mode & MSG_LOCKED) {
638			DPRINTF(("msqid is locked\n"));
639			need_more_resources = 1;
640		}
641		if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes) {
642			DPRINTF(("msgsz + msg_cbytes > msg_qbytes\n"));
643			need_more_resources = 1;
644		}
645		if (segs_needed > nfree_msgmaps) {
646			DPRINTF(("segs_needed > nfree_msgmaps\n"));
647			need_more_resources = 1;
648		}
649		if (free_msghdrs == NULL) {
650			DPRINTF(("no more msghdrs\n"));
651			need_more_resources = 1;
652		}
653
654		if (need_more_resources) {
655			int we_own_it;
656
657			if ((msgflg & IPC_NOWAIT) != 0) {
658				DPRINTF(("need more resources but caller "
659				    "doesn't want to wait\n"));
660				error = EAGAIN;
661				goto done2;
662			}
663
664			if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0) {
665				DPRINTF(("we don't own the msqid_ds\n"));
666				we_own_it = 0;
667			} else {
668				/* Force later arrivals to wait for our
669				   request */
670				DPRINTF(("we own the msqid_ds\n"));
671				msqkptr->u.msg_perm.mode |= MSG_LOCKED;
672				we_own_it = 1;
673			}
674			DPRINTF(("goodnight\n"));
675			error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
676			    "msgwait", 0);
677			DPRINTF(("good morning, error=%d\n", error));
678			if (we_own_it)
679				msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
680			if (error != 0) {
681				DPRINTF(("msgsnd:  interrupted system call\n"));
682				error = EINTR;
683				goto done2;
684			}
685
686			/*
687			 * Make sure that the msq queue still exists
688			 */
689
690			if (msqkptr->u.msg_qbytes == 0) {
691				DPRINTF(("msqid deleted\n"));
692				error = EIDRM;
693				goto done2;
694			}
695
696		} else {
697			DPRINTF(("got all the resources that we need\n"));
698			break;
699		}
700	}
701
702	/*
703	 * We have the resources that we need.
704	 * Make sure!
705	 */
706
707	if (msqkptr->u.msg_perm.mode & MSG_LOCKED)
708		panic("msg_perm.mode & MSG_LOCKED");
709	if (segs_needed > nfree_msgmaps)
710		panic("segs_needed > nfree_msgmaps");
711	if (msgsz + msqkptr->u.msg_cbytes > msqkptr->u.msg_qbytes)
712		panic("msgsz + msg_cbytes > msg_qbytes");
713	if (free_msghdrs == NULL)
714		panic("no more msghdrs");
715
716	/*
717	 * Re-lock the msqid_ds in case we page-fault when copying in the
718	 * message
719	 */
720
721	if ((msqkptr->u.msg_perm.mode & MSG_LOCKED) != 0)
722		panic("msqid_ds is already locked");
723	msqkptr->u.msg_perm.mode |= MSG_LOCKED;
724
725	/*
726	 * Allocate a message header
727	 */
728
729	msghdr = free_msghdrs;
730	free_msghdrs = msghdr->msg_next;
731	msghdr->msg_spot = -1;
732	msghdr->msg_ts = msgsz;
733
734	/*
735	 * Allocate space for the message
736	 */
737
738	while (segs_needed > 0) {
739		if (nfree_msgmaps <= 0)
740			panic("not enough msgmaps");
741		if (free_msgmaps == -1)
742			panic("nil free_msgmaps");
743		next = free_msgmaps;
744		if (next <= -1)
745			panic("next too low #1");
746		if (next >= msginfo.msgseg)
747			panic("next out of range #1");
748		DPRINTF(("allocating segment %d to message\n", next));
749		free_msgmaps = msgmaps[next].next;
750		nfree_msgmaps--;
751		msgmaps[next].next = msghdr->msg_spot;
752		msghdr->msg_spot = next;
753		segs_needed--;
754	}
755
756	/*
757	 * Copy in the message type
758	 */
759
760	mtx_unlock(&msq_mtx);
761	if ((error = copyin(user_msgp, &msghdr->msg_type,
762	    sizeof(msghdr->msg_type))) != 0) {
763		mtx_lock(&msq_mtx);
764		DPRINTF(("error %d copying the message type\n", error));
765		msg_freehdr(msghdr);
766		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
767		wakeup(msqkptr);
768		goto done2;
769	}
770	mtx_lock(&msq_mtx);
771	user_msgp = (const char *)user_msgp + sizeof(msghdr->msg_type);
772
773	/*
774	 * Validate the message type
775	 */
776
777	if (msghdr->msg_type < 1) {
778		msg_freehdr(msghdr);
779		msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
780		wakeup(msqkptr);
781		DPRINTF(("mtype (%d) < 1\n", msghdr->msg_type));
782		error = EINVAL;
783		goto done2;
784	}
785
786	/*
787	 * Copy in the message body
788	 */
789
790	next = msghdr->msg_spot;
791	while (msgsz > 0) {
792		size_t tlen;
793		if (msgsz > msginfo.msgssz)
794			tlen = msginfo.msgssz;
795		else
796			tlen = msgsz;
797		if (next <= -1)
798			panic("next too low #2");
799		if (next >= msginfo.msgseg)
800			panic("next out of range #2");
801		mtx_unlock(&msq_mtx);
802		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
803		    tlen)) != 0) {
804			mtx_lock(&msq_mtx);
805			DPRINTF(("error %d copying in message segment\n",
806			    error));
807			msg_freehdr(msghdr);
808			msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
809			wakeup(msqkptr);
810			goto done2;
811		}
812		mtx_lock(&msq_mtx);
813		msgsz -= tlen;
814		user_msgp = (const char *)user_msgp + tlen;
815		next = msgmaps[next].next;
816	}
817	if (next != -1)
818		panic("didn't use all the msg segments");
819
820	/*
821	 * We've got the message.  Unlock the msqid_ds.
822	 */
823
824	msqkptr->u.msg_perm.mode &= ~MSG_LOCKED;
825
826	/*
827	 * Make sure that the msqid_ds is still allocated.
828	 */
829
830	if (msqkptr->u.msg_qbytes == 0) {
831		msg_freehdr(msghdr);
832		wakeup(msqkptr);
833		error = EIDRM;
834		goto done2;
835	}
836
837	/*
838	 * Put the message into the queue
839	 */
840	if (msqkptr->u.msg_first == NULL) {
841		msqkptr->u.msg_first = msghdr;
842		msqkptr->u.msg_last = msghdr;
843	} else {
844		msqkptr->u.msg_last->msg_next = msghdr;
845		msqkptr->u.msg_last = msghdr;
846	}
847	msqkptr->u.msg_last->msg_next = NULL;
848
849	msqkptr->u.msg_cbytes += msghdr->msg_ts;
850	msqkptr->u.msg_qnum++;
851	msqkptr->u.msg_lspid = td->td_proc->p_pid;
852	msqkptr->u.msg_stime = time_second;
853
854	wakeup(msqkptr);
855	td->td_retval[0] = 0;
856done2:
857	mtx_unlock(&msq_mtx);
858	return (error);
859}
860
861#ifndef _SYS_SYSPROTO_H_
862struct msgrcv_args {
863	int	msqid;
864	void	*msgp;
865	size_t	msgsz;
866	long	msgtyp;
867	int	msgflg;
868};
869#endif
870
871/*
872 * MPSAFE
873 */
874int
875msgrcv(td, uap)
876	struct thread *td;
877	register struct msgrcv_args *uap;
878{
879	int msqid = uap->msqid;
880	void *user_msgp = uap->msgp;
881	size_t msgsz = uap->msgsz;
882	long msgtyp = uap->msgtyp;
883	int msgflg = uap->msgflg;
884	size_t len;
885	register struct msqid_kernel *msqkptr;
886	register struct msg *msghdr;
887	int error = 0;
888	short next;
889
890	DPRINTF(("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
891	    msgsz, msgtyp, msgflg));
892
893	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
894		return (ENOSYS);
895
896	msqid = IPCID_TO_IX(msqid);
897
898	if (msqid < 0 || msqid >= msginfo.msgmni) {
899		DPRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
900		    msginfo.msgmni));
901		return (EINVAL);
902	}
903
904	msqkptr = &msqids[msqid];
905	mtx_lock(&msq_mtx);
906	if (msqkptr->u.msg_qbytes == 0) {
907		DPRINTF(("no such message queue id\n"));
908		error = EINVAL;
909		goto done2;
910	}
911	if (msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
912		DPRINTF(("wrong sequence number\n"));
913		error = EINVAL;
914		goto done2;
915	}
916
917	if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
918		DPRINTF(("requester doesn't have read access\n"));
919		goto done2;
920	}
921
922	msghdr = NULL;
923	while (msghdr == NULL) {
924		if (msgtyp == 0) {
925			msghdr = msqkptr->u.msg_first;
926			if (msghdr != NULL) {
927				if (msgsz < msghdr->msg_ts &&
928				    (msgflg & MSG_NOERROR) == 0) {
929					DPRINTF(("first message on the queue "
930					    "is too big (want %d, got %d)\n",
931					    msgsz, msghdr->msg_ts));
932					error = E2BIG;
933					goto done2;
934				}
935				if (msqkptr->u.msg_first == msqkptr->u.msg_last) {
936					msqkptr->u.msg_first = NULL;
937					msqkptr->u.msg_last = NULL;
938				} else {
939					msqkptr->u.msg_first = msghdr->msg_next;
940					if (msqkptr->u.msg_first == NULL)
941						panic("msg_first/last screwed up #1");
942				}
943			}
944		} else {
945			struct msg *previous;
946			struct msg **prev;
947
948			previous = NULL;
949			prev = &(msqkptr->u.msg_first);
950			while ((msghdr = *prev) != NULL) {
951				/*
952				 * Is this message's type an exact match or is
953				 * this message's type less than or equal to
954				 * the absolute value of a negative msgtyp?
955				 * Note that the second half of this test can
956				 * NEVER be true if msgtyp is positive since
957				 * msg_type is always positive!
958				 */
959
960				if (msgtyp == msghdr->msg_type ||
961				    msghdr->msg_type <= -msgtyp) {
962					DPRINTF(("found message type %d, "
963					    "requested %d\n",
964					    msghdr->msg_type, msgtyp));
965					if (msgsz < msghdr->msg_ts &&
966					    (msgflg & MSG_NOERROR) == 0) {
967						DPRINTF(("requested message "
968						    "on the queue is too big "
969						    "(want %d, got %d)\n",
970						    msgsz, msghdr->msg_ts));
971						error = E2BIG;
972						goto done2;
973					}
974					*prev = msghdr->msg_next;
975					if (msghdr == msqkptr->u.msg_last) {
976						if (previous == NULL) {
977							if (prev !=
978							    &msqkptr->u.msg_first)
979								panic("msg_first/last screwed up #2");
980							msqkptr->u.msg_first =
981							    NULL;
982							msqkptr->u.msg_last =
983							    NULL;
984						} else {
985							if (prev ==
986							    &msqkptr->u.msg_first)
987								panic("msg_first/last screwed up #3");
988							msqkptr->u.msg_last =
989							    previous;
990						}
991					}
992					break;
993				}
994				previous = msghdr;
995				prev = &(msghdr->msg_next);
996			}
997		}
998
999		/*
1000		 * We've either extracted the msghdr for the appropriate
1001		 * message or there isn't one.
1002		 * If there is one then bail out of this loop.
1003		 */
1004
1005		if (msghdr != NULL)
1006			break;
1007
1008		/*
1009		 * Hmph!  No message found.  Does the user want to wait?
1010		 */
1011
1012		if ((msgflg & IPC_NOWAIT) != 0) {
1013			DPRINTF(("no appropriate message found (msgtyp=%d)\n",
1014			    msgtyp));
1015			/* The SVID says to return ENOMSG. */
1016			error = ENOMSG;
1017			goto done2;
1018		}
1019
1020		/*
1021		 * Wait for something to happen
1022		 */
1023
1024		DPRINTF(("msgrcv:  goodnight\n"));
1025		error = msleep(msqkptr, &msq_mtx, (PZERO - 4) | PCATCH,
1026		    "msgwait", 0);
1027		DPRINTF(("msgrcv:  good morning (error=%d)\n", error));
1028
1029		if (error != 0) {
1030			DPRINTF(("msgsnd:  interrupted system call\n"));
1031			error = EINTR;
1032			goto done2;
1033		}
1034
1035		/*
1036		 * Make sure that the msq queue still exists
1037		 */
1038
1039		if (msqkptr->u.msg_qbytes == 0 ||
1040		    msqkptr->u.msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1041			DPRINTF(("msqid deleted\n"));
1042			error = EIDRM;
1043			goto done2;
1044		}
1045	}
1046
1047	/*
1048	 * Return the message to the user.
1049	 *
1050	 * First, do the bookkeeping (before we risk being interrupted).
1051	 */
1052
1053	msqkptr->u.msg_cbytes -= msghdr->msg_ts;
1054	msqkptr->u.msg_qnum--;
1055	msqkptr->u.msg_lrpid = td->td_proc->p_pid;
1056	msqkptr->u.msg_rtime = time_second;
1057
1058	/*
1059	 * Make msgsz the actual amount that we'll be returning.
1060	 * Note that this effectively truncates the message if it is too long
1061	 * (since msgsz is never increased).
1062	 */
1063
1064	DPRINTF(("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1065	    msghdr->msg_ts));
1066	if (msgsz > msghdr->msg_ts)
1067		msgsz = msghdr->msg_ts;
1068
1069	/*
1070	 * Return the type to the user.
1071	 */
1072
1073	mtx_unlock(&msq_mtx);
1074	error = copyout(&(msghdr->msg_type), user_msgp,
1075	    sizeof(msghdr->msg_type));
1076	mtx_lock(&msq_mtx);
1077	if (error != 0) {
1078		DPRINTF(("error (%d) copying out message type\n", error));
1079		msg_freehdr(msghdr);
1080		wakeup(msqkptr);
1081		goto done2;
1082	}
1083	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1084
1085	/*
1086	 * Return the segments to the user
1087	 */
1088
1089	next = msghdr->msg_spot;
1090	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1091		size_t tlen;
1092
1093		if (msgsz - len > msginfo.msgssz)
1094			tlen = msginfo.msgssz;
1095		else
1096			tlen = msgsz - len;
1097		if (next <= -1)
1098			panic("next too low #3");
1099		if (next >= msginfo.msgseg)
1100			panic("next out of range #3");
1101		mtx_unlock(&msq_mtx);
1102		error = copyout(&msgpool[next * msginfo.msgssz],
1103		    user_msgp, tlen);
1104		mtx_lock(&msq_mtx);
1105		if (error != 0) {
1106			DPRINTF(("error (%d) copying out message segment\n",
1107			    error));
1108			msg_freehdr(msghdr);
1109			wakeup(msqkptr);
1110			goto done2;
1111		}
1112		user_msgp = (char *)user_msgp + tlen;
1113		next = msgmaps[next].next;
1114	}
1115
1116	/*
1117	 * Done, return the actual number of bytes copied out.
1118	 */
1119
1120	msg_freehdr(msghdr);
1121	wakeup(msqkptr);
1122	td->td_retval[0] = msgsz;
1123done2:
1124	mtx_unlock(&msq_mtx);
1125	return (error);
1126}
1127
1128static int
1129sysctl_msqids(SYSCTL_HANDLER_ARGS)
1130{
1131
1132	return (SYSCTL_OUT(req, msqids,
1133	    sizeof(struct msqid_kernel) * msginfo.msgmni));
1134}
1135
1136SYSCTL_DECL(_kern_ipc);
1137SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1138SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RDTUN, &msginfo.msgmni, 0, "");
1139SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1140SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1141SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RDTUN, &msginfo.msgssz, 0, "");
1142SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0, "");
1143SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1144    NULL, 0, sysctl_msqids, "", "Message queue IDs");
1145