sysv_msg.c revision 100511
11573Srgrimes/* $FreeBSD: head/sys/kern/sysv_msg.c 100511 2002-07-22 16:12:55Z alfred $ */
21573Srgrimes
31573Srgrimes/*
41573Srgrimes * Implementation of SVID messages
51573Srgrimes *
61573Srgrimes * Author:  Daniel Boulet
71573Srgrimes *
81573Srgrimes * Copyright 1993 Daniel Boulet and RTMX Inc.
91573Srgrimes *
101573Srgrimes * This system call was implemented by Daniel Boulet under contract from RTMX.
111573Srgrimes *
121573Srgrimes * Redistribution and use in source forms, with and without modification,
131573Srgrimes * are permitted provided that this entire comment appears intact.
14251069Semaste *
151573Srgrimes * Redistribution in binary form may occur without any restrictions.
161573Srgrimes * Obviously, it would be nice if you gave credit where credit is due
171573Srgrimes * but requiring it would be too onerous.
181573Srgrimes *
191573Srgrimes * This software is provided ``AS IS'' without any warranties of any kind.
201573Srgrimes */
211573Srgrimes
221573Srgrimes#include "opt_sysvipc.h"
231573Srgrimes
241573Srgrimes#include <sys/param.h>
251573Srgrimes#include <sys/systm.h>
261573Srgrimes#include <sys/sysproto.h>
271573Srgrimes#include <sys/kernel.h>
281573Srgrimes#include <sys/proc.h>
291573Srgrimes#include <sys/lock.h>
301573Srgrimes#include <sys/mutex.h>
3150476Speter#include <sys/msg.h>
321573Srgrimes#include <sys/syscall.h>
33289467Scem#include <sys/sysent.h>
341573Srgrimes#include <sys/sysctl.h>
351573Srgrimes#include <sys/malloc.h>
361573Srgrimes#include <sys/jail.h>
37124483Sdes
38124483Sdesstatic MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
39184587Skib
40124483Sdesstatic void msginit(void);
41184587Skibstatic int msgunload(void);
42184587Skibstatic int sysvmsg_modload(struct module *, int, void *);
43124483Sdes
4459460Sphantom#define MSG_DEBUG
4559460Sphantom#undef MSG_DEBUG_OK
461573Srgrimes
47102635Srobertstatic void msg_freehdr(struct msg *msghdr);
481573Srgrimes
491573Srgrimes/* XXX casting to (sy_call_t *) is bogus, as usual. */
50124483Sdesstatic sy_call_t *msgcalls[] = {
51124483Sdes	(sy_call_t *)msgctl, (sy_call_t *)msgget,
52124483Sdes	(sy_call_t *)msgsnd, (sy_call_t *)msgrcv
53221401Sdelphij};
54184587Skib
55124483Sdesstruct msg {
56124483Sdes	struct	msg *msg_next;	/* next msg in the chain */
57124483Sdes	long	msg_type;	/* type of this message */
58184587Skib    				/* >0 -> type of this message */
59184587Skib    				/* 0 -> free header */
601573Srgrimes	u_short	msg_ts;		/* size of this message */
611573Srgrimes	short	msg_spot;	/* location of start of msg in buffer */
62184587Skib};
63184587Skib
64124483Sdes
65184587Skib#ifndef MSGSSZ
66241062Seadler#define MSGSSZ	8		/* Each segment must be 2^N long */
67163274Sru#endif
681573Srgrimes#ifndef MSGSEG
69124483Sdes#define MSGSEG	2048		/* must be less than 32767 */
70124483Sdes#endif
71124483Sdes#define MSGMAX	(MSGSSZ*MSGSEG)
72184587Skib#ifndef MSGMNB
73184587Skib#define MSGMNB	2048		/* max # of bytes in a queue */
74124483Sdes#endif
75184587Skib#ifndef MSGMNI
76241062Seadler#define MSGMNI	40
77124483Sdes#endif
78124483Sdes#ifndef MSGTQL
79124483Sdes#define MSGTQL	40
80241062Seadler#endif
81124483Sdes
82124483Sdes/*
831573Srgrimes * Based on the configuration parameters described in an SVR2 (yes, two)
84289467Scem * config(1m) man page.
85289467Scem *
861573Srgrimes * Each message is broken up and stored in segments that are msgssz bytes
871573Srgrimes * long.  For efficiency reasons, this should be a power of two.  Also,
881573Srgrimes * it doesn't make sense if it is less than 8 or greater than about 256.
891573Srgrimes * Consequently, msginit in kern/sysv_msg.c checks that msgssz is a power of
901573Srgrimes * two between 8 and 1024 inclusive (and panic's if it isn't).
91102635Srobert */
92119893Srustruct msginfo msginfo = {
93102635Srobert                MSGMAX,         /* max chars in a message */
94119893Sru                MSGMNI,         /* # of message queue identifiers */
95102635Srobert                MSGMNB,         /* max chars in a queue */
96102635Srobert                MSGTQL,         /* max messages in system */
97102635Srobert                MSGSSZ,         /* size of a message segment */
98124483Sdes                		/* (must be small power of 2 greater than 4) */
99124483Sdes                MSGSEG          /* number of message segments */
100124483Sdes};
101124483Sdes
102124483Sdes/*
103124483Sdes * macros to convert between msqid_ds's and msqid's.
104124483Sdes * (specific to this implementation)
105124483Sdes */
106184587Skib#define MSQID(ix,ds)	((ix) & 0xffff | (((ds).msg_perm.seq << 16) & 0xffff0000))
107184587Skib#define MSQID_IX(id)	((id) & 0xffff)
108184587Skib#define MSQID_SEQ(id)	(((id) >> 16) & 0xffff)
109184587Skib
110184587Skib/*
111187147Skib * The rest of this file is specific to this particular implementation.
112 */
113
114struct msgmap {
115	short	next;		/* next segment in buffer */
116    				/* -1 -> available */
117    				/* 0..(MSGSEG-1) -> index of next segment */
118};
119
120#define MSG_LOCKED	01000	/* Is this msqid_ds locked? */
121
122static int nfree_msgmaps;	/* # of free map entries */
123static short free_msgmaps;	/* head of linked list of free map entries */
124static struct msg *free_msghdrs;/* list of free msg headers */
125static char *msgpool;		/* MSGMAX byte long msg buffer pool */
126static struct msgmap *msgmaps;	/* MSGSEG msgmap structures */
127static struct msg *msghdrs;	/* MSGTQL msg headers */
128static struct msqid_ds *msqids;	/* MSGMNI msqid_ds struct's */
129
130static void
131msginit()
132{
133	register int i;
134
135	TUNABLE_INT_FETCH("kern.ipc.msgseg", &msginfo.msgseg);
136	TUNABLE_INT_FETCH("kern.ipc.msgssz", &msginfo.msgssz);
137	msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
138	TUNABLE_INT_FETCH("kern.ipc.msgmni", &msginfo.msgmni);
139
140	msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
141	if (msgpool == NULL)
142		panic("msgpool is NULL");
143	msgmaps = malloc(sizeof(struct msgmap) * msginfo.msgseg, M_MSG, M_WAITOK);
144	if (msgmaps == NULL)
145		panic("msgmaps is NULL");
146	msghdrs = malloc(sizeof(struct msg) * msginfo.msgtql, M_MSG, M_WAITOK);
147	if (msghdrs == NULL)
148		panic("msghdrs is NULL");
149	msqids = malloc(sizeof(struct msqid_ds) * msginfo.msgmni, M_MSG, M_WAITOK);
150	if (msqids == NULL)
151		panic("msqids is NULL");
152
153	/*
154	 * msginfo.msgssz should be a power of two for efficiency reasons.
155	 * It is also pretty silly if msginfo.msgssz is less than 8
156	 * or greater than about 256 so ...
157	 */
158
159	i = 8;
160	while (i < 1024 && i != msginfo.msgssz)
161		i <<= 1;
162    	if (i != msginfo.msgssz) {
163		printf("msginfo.msgssz=%d (0x%x)\n", msginfo.msgssz,
164		    msginfo.msgssz);
165		panic("msginfo.msgssz not a small power of 2");
166	}
167
168	if (msginfo.msgseg > 32767) {
169		printf("msginfo.msgseg=%d\n", msginfo.msgseg);
170		panic("msginfo.msgseg > 32767");
171	}
172
173	if (msgmaps == NULL)
174		panic("msgmaps is NULL");
175
176	for (i = 0; i < msginfo.msgseg; i++) {
177		if (i > 0)
178			msgmaps[i-1].next = i;
179		msgmaps[i].next = -1;	/* implies entry is available */
180	}
181	free_msgmaps = 0;
182	nfree_msgmaps = msginfo.msgseg;
183
184	if (msghdrs == NULL)
185		panic("msghdrs is NULL");
186
187	for (i = 0; i < msginfo.msgtql; i++) {
188		msghdrs[i].msg_type = 0;
189		if (i > 0)
190			msghdrs[i-1].msg_next = &msghdrs[i];
191		msghdrs[i].msg_next = NULL;
192    	}
193	free_msghdrs = &msghdrs[0];
194
195	if (msqids == NULL)
196		panic("msqids is NULL");
197
198	for (i = 0; i < msginfo.msgmni; i++) {
199		msqids[i].msg_qbytes = 0;	/* implies entry is available */
200		msqids[i].msg_perm.seq = 0;	/* reset to a known value */
201		msqids[i].msg_perm.mode = 0;
202	}
203}
204
205static int
206msgunload()
207{
208	struct msqid_ds *msqptr;
209	int msqid;
210
211	for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
212		/*
213		 * Look for an unallocated and unlocked msqid_ds.
214		 * msqid_ds's can be locked by msgsnd or msgrcv while
215		 * they are copying the message in/out.  We can't
216		 * re-use the entry until they release it.
217		 */
218		msqptr = &msqids[msqid];
219		if (msqptr->msg_qbytes != 0 ||
220		    (msqptr->msg_perm.mode & MSG_LOCKED) != 0)
221			break;
222	}
223	if (msqid != msginfo.msgmni)
224		return (EBUSY);
225
226	free(msgpool, M_MSG);
227	free(msgmaps, M_MSG);
228	free(msghdrs, M_MSG);
229	free(msqids, M_MSG);
230	return (0);
231}
232
233
234static int
235sysvmsg_modload(struct module *module, int cmd, void *arg)
236{
237	int error = 0;
238
239	switch (cmd) {
240	case MOD_LOAD:
241		msginit();
242		break;
243	case MOD_UNLOAD:
244		error = msgunload();
245		break;
246	case MOD_SHUTDOWN:
247		break;
248	default:
249		error = EINVAL;
250		break;
251	}
252	return (error);
253}
254
255static moduledata_t sysvmsg_mod = {
256	"sysvmsg",
257	&sysvmsg_modload,
258	NULL
259};
260
261SYSCALL_MODULE_HELPER(msgsys);
262SYSCALL_MODULE_HELPER(msgctl);
263SYSCALL_MODULE_HELPER(msgget);
264SYSCALL_MODULE_HELPER(msgsnd);
265SYSCALL_MODULE_HELPER(msgrcv);
266
267DECLARE_MODULE(sysvmsg, sysvmsg_mod,
268	SI_SUB_SYSV_MSG, SI_ORDER_FIRST);
269MODULE_VERSION(sysvmsg, 1);
270
271/*
272 * Entry point for all MSG calls
273 *
274 * MPSAFE
275 */
276int
277msgsys(td, uap)
278	struct thread *td;
279	/* XXX actually varargs. */
280	struct msgsys_args /* {
281		u_int	which;
282		int	a2;
283		int	a3;
284		int	a4;
285		int	a5;
286		int	a6;
287	} */ *uap;
288{
289	int error;
290
291	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
292		return (ENOSYS);
293	if (uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
294		return (EINVAL);
295	mtx_lock(&Giant);
296	error = (*msgcalls[uap->which])(td, &uap->a2);
297	mtx_unlock(&Giant);
298	return (error);
299}
300
301static void
302msg_freehdr(msghdr)
303	struct msg *msghdr;
304{
305	while (msghdr->msg_ts > 0) {
306		short next;
307		if (msghdr->msg_spot < 0 || msghdr->msg_spot >= msginfo.msgseg)
308			panic("msghdr->msg_spot out of range");
309		next = msgmaps[msghdr->msg_spot].next;
310		msgmaps[msghdr->msg_spot].next = free_msgmaps;
311		free_msgmaps = msghdr->msg_spot;
312		nfree_msgmaps++;
313		msghdr->msg_spot = next;
314		if (msghdr->msg_ts >= msginfo.msgssz)
315			msghdr->msg_ts -= msginfo.msgssz;
316		else
317			msghdr->msg_ts = 0;
318	}
319	if (msghdr->msg_spot != -1)
320		panic("msghdr->msg_spot != -1");
321	msghdr->msg_next = free_msghdrs;
322	free_msghdrs = msghdr;
323}
324
325#ifndef _SYS_SYSPROTO_H_
326struct msgctl_args {
327	int	msqid;
328	int	cmd;
329	struct	msqid_ds *buf;
330};
331#endif
332
333/*
334 * MPSAFE
335 */
336int
337msgctl(td, uap)
338	struct thread *td;
339	register struct msgctl_args *uap;
340{
341	int msqid = uap->msqid;
342	int cmd = uap->cmd;
343	struct msqid_ds *user_msqptr = uap->buf;
344	int rval, error;
345	struct msqid_ds msqbuf;
346	register struct msqid_ds *msqptr;
347
348#ifdef MSG_DEBUG_OK
349	printf("call to msgctl(%d, %d, 0x%x)\n", msqid, cmd, user_msqptr);
350#endif
351	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
352		return (ENOSYS);
353
354	mtx_lock(&Giant);
355	msqid = IPCID_TO_IX(msqid);
356
357	if (msqid < 0 || msqid >= msginfo.msgmni) {
358#ifdef MSG_DEBUG_OK
359		printf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
360		    msginfo.msgmni);
361#endif
362		error = EINVAL;
363		goto done2;
364	}
365
366	msqptr = &msqids[msqid];
367
368	if (msqptr->msg_qbytes == 0) {
369#ifdef MSG_DEBUG_OK
370		printf("no such msqid\n");
371#endif
372		error = EINVAL;
373		goto done2;
374	}
375	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
376#ifdef MSG_DEBUG_OK
377		printf("wrong sequence number\n");
378#endif
379		error = EINVAL;
380		goto done2;
381	}
382
383	error = 0;
384	rval = 0;
385
386	switch (cmd) {
387
388	case IPC_RMID:
389	{
390		struct msg *msghdr;
391		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
392			goto done2;
393		/* Free the message headers */
394		msghdr = msqptr->msg_first;
395		while (msghdr != NULL) {
396			struct msg *msghdr_tmp;
397
398			/* Free the segments of each message */
399			msqptr->msg_cbytes -= msghdr->msg_ts;
400			msqptr->msg_qnum--;
401			msghdr_tmp = msghdr;
402			msghdr = msghdr->msg_next;
403			msg_freehdr(msghdr_tmp);
404		}
405
406		if (msqptr->msg_cbytes != 0)
407			panic("msg_cbytes is screwed up");
408		if (msqptr->msg_qnum != 0)
409			panic("msg_qnum is screwed up");
410
411		msqptr->msg_qbytes = 0;	/* Mark it as free */
412
413		wakeup(msqptr);
414	}
415
416		break;
417
418	case IPC_SET:
419		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_M)))
420			goto done2;
421		if ((error = copyin(user_msqptr, &msqbuf, sizeof(msqbuf))) != 0)
422			goto done2;
423		if (msqbuf.msg_qbytes > msqptr->msg_qbytes) {
424			error = suser(td);
425			if (error)
426				goto done2;
427		}
428		if (msqbuf.msg_qbytes > msginfo.msgmnb) {
429#ifdef MSG_DEBUG_OK
430			printf("can't increase msg_qbytes beyond %d (truncating)\n",
431			    msginfo.msgmnb);
432#endif
433			msqbuf.msg_qbytes = msginfo.msgmnb;	/* silently restrict qbytes to system limit */
434		}
435		if (msqbuf.msg_qbytes == 0) {
436#ifdef MSG_DEBUG_OK
437			printf("can't reduce msg_qbytes to 0\n");
438#endif
439			error = EINVAL;		/* non-standard errno! */
440			goto done2;
441		}
442		msqptr->msg_perm.uid = msqbuf.msg_perm.uid;	/* change the owner */
443		msqptr->msg_perm.gid = msqbuf.msg_perm.gid;	/* change the owner */
444		msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) |
445		    (msqbuf.msg_perm.mode & 0777);
446		msqptr->msg_qbytes = msqbuf.msg_qbytes;
447		msqptr->msg_ctime = time_second;
448		break;
449
450	case IPC_STAT:
451		if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
452#ifdef MSG_DEBUG_OK
453			printf("requester doesn't have read access\n");
454#endif
455			goto done2;
456		}
457		error = copyout(msqptr, user_msqptr, sizeof(struct msqid_ds));
458		break;
459
460	default:
461#ifdef MSG_DEBUG_OK
462		printf("invalid command %d\n", cmd);
463#endif
464		error = EINVAL;
465		goto done2;
466	}
467
468	if (error == 0)
469		td->td_retval[0] = rval;
470done2:
471	mtx_unlock(&Giant);
472	return(error);
473}
474
475#ifndef _SYS_SYSPROTO_H_
476struct msgget_args {
477	key_t	key;
478	int	msgflg;
479};
480#endif
481
482/*
483 * MPSAFE
484 */
485int
486msgget(td, uap)
487	struct thread *td;
488	register struct msgget_args *uap;
489{
490	int msqid, error = 0;
491	int key = uap->key;
492	int msgflg = uap->msgflg;
493	struct ucred *cred = td->td_ucred;
494	register struct msqid_ds *msqptr = NULL;
495
496#ifdef MSG_DEBUG_OK
497	printf("msgget(0x%x, 0%o)\n", key, msgflg);
498#endif
499
500	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
501		return (ENOSYS);
502
503	mtx_lock(&Giant);
504	if (key != IPC_PRIVATE) {
505		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
506			msqptr = &msqids[msqid];
507			if (msqptr->msg_qbytes != 0 &&
508			    msqptr->msg_perm.key == key)
509				break;
510		}
511		if (msqid < msginfo.msgmni) {
512#ifdef MSG_DEBUG_OK
513			printf("found public key\n");
514#endif
515			if ((msgflg & IPC_CREAT) && (msgflg & IPC_EXCL)) {
516#ifdef MSG_DEBUG_OK
517				printf("not exclusive\n");
518#endif
519				error = EEXIST;
520				goto done2;
521			}
522			if ((error = ipcperm(td, &msqptr->msg_perm, msgflg & 0700 ))) {
523#ifdef MSG_DEBUG_OK
524				printf("requester doesn't have 0%o access\n",
525				    msgflg & 0700);
526#endif
527				goto done2;
528			}
529			goto found;
530		}
531	}
532
533#ifdef MSG_DEBUG_OK
534	printf("need to allocate the msqid_ds\n");
535#endif
536	if (key == IPC_PRIVATE || (msgflg & IPC_CREAT)) {
537		for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
538			/*
539			 * Look for an unallocated and unlocked msqid_ds.
540			 * msqid_ds's can be locked by msgsnd or msgrcv while
541			 * they are copying the message in/out.  We can't
542			 * re-use the entry until they release it.
543			 */
544			msqptr = &msqids[msqid];
545			if (msqptr->msg_qbytes == 0 &&
546			    (msqptr->msg_perm.mode & MSG_LOCKED) == 0)
547				break;
548		}
549		if (msqid == msginfo.msgmni) {
550#ifdef MSG_DEBUG_OK
551			printf("no more msqid_ds's available\n");
552#endif
553			error = ENOSPC;
554			goto done2;
555		}
556#ifdef MSG_DEBUG_OK
557		printf("msqid %d is available\n", msqid);
558#endif
559		msqptr->msg_perm.key = key;
560		msqptr->msg_perm.cuid = cred->cr_uid;
561		msqptr->msg_perm.uid = cred->cr_uid;
562		msqptr->msg_perm.cgid = cred->cr_gid;
563		msqptr->msg_perm.gid = cred->cr_gid;
564		msqptr->msg_perm.mode = (msgflg & 0777);
565		/* Make sure that the returned msqid is unique */
566		msqptr->msg_perm.seq++;
567		msqptr->msg_first = NULL;
568		msqptr->msg_last = NULL;
569		msqptr->msg_cbytes = 0;
570		msqptr->msg_qnum = 0;
571		msqptr->msg_qbytes = msginfo.msgmnb;
572		msqptr->msg_lspid = 0;
573		msqptr->msg_lrpid = 0;
574		msqptr->msg_stime = 0;
575		msqptr->msg_rtime = 0;
576		msqptr->msg_ctime = time_second;
577	} else {
578#ifdef MSG_DEBUG_OK
579		printf("didn't find it and wasn't asked to create it\n");
580#endif
581		error = ENOENT;
582		goto done2;
583	}
584
585found:
586	/* Construct the unique msqid */
587	td->td_retval[0] = IXSEQ_TO_IPCID(msqid, msqptr->msg_perm);
588done2:
589	mtx_unlock(&Giant);
590	return (error);
591}
592
593#ifndef _SYS_SYSPROTO_H_
594struct msgsnd_args {
595	int	msqid;
596	void	*msgp;
597	size_t	msgsz;
598	int	msgflg;
599};
600#endif
601
602/*
603 * MPSAFE
604 */
605int
606msgsnd(td, uap)
607	struct thread *td;
608	register struct msgsnd_args *uap;
609{
610	int msqid = uap->msqid;
611	void *user_msgp = uap->msgp;
612	size_t msgsz = uap->msgsz;
613	int msgflg = uap->msgflg;
614	int segs_needed, error = 0;
615	register struct msqid_ds *msqptr;
616	register struct msg *msghdr;
617	short next;
618
619#ifdef MSG_DEBUG_OK
620	printf("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz,
621	    msgflg);
622#endif
623	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
624		return (ENOSYS);
625
626	mtx_lock(&Giant);
627	msqid = IPCID_TO_IX(msqid);
628
629	if (msqid < 0 || msqid >= msginfo.msgmni) {
630#ifdef MSG_DEBUG_OK
631		printf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
632		    msginfo.msgmni);
633#endif
634		error = EINVAL;
635		goto done2;
636	}
637
638	msqptr = &msqids[msqid];
639	if (msqptr->msg_qbytes == 0) {
640#ifdef MSG_DEBUG_OK
641		printf("no such message queue id\n");
642#endif
643		error = EINVAL;
644		goto done2;
645	}
646	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
647#ifdef MSG_DEBUG_OK
648		printf("wrong sequence number\n");
649#endif
650		error = EINVAL;
651		goto done2;
652	}
653
654	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_W))) {
655#ifdef MSG_DEBUG_OK
656		printf("requester doesn't have write access\n");
657#endif
658		goto done2;
659	}
660
661	segs_needed = (msgsz + msginfo.msgssz - 1) / msginfo.msgssz;
662#ifdef MSG_DEBUG_OK
663	printf("msgsz=%d, msgssz=%d, segs_needed=%d\n", msgsz, msginfo.msgssz,
664	    segs_needed);
665#endif
666	for (;;) {
667		int need_more_resources = 0;
668
669		/*
670		 * check msgsz
671		 * (inside this loop in case msg_qbytes changes while we sleep)
672		 */
673
674		if (msgsz > msqptr->msg_qbytes) {
675#ifdef MSG_DEBUG_OK
676			printf("msgsz > msqptr->msg_qbytes\n");
677#endif
678			error = EINVAL;
679			goto done2;
680		}
681
682		if (msqptr->msg_perm.mode & MSG_LOCKED) {
683#ifdef MSG_DEBUG_OK
684			printf("msqid is locked\n");
685#endif
686			need_more_resources = 1;
687		}
688		if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes) {
689#ifdef MSG_DEBUG_OK
690			printf("msgsz + msg_cbytes > msg_qbytes\n");
691#endif
692			need_more_resources = 1;
693		}
694		if (segs_needed > nfree_msgmaps) {
695#ifdef MSG_DEBUG_OK
696			printf("segs_needed > nfree_msgmaps\n");
697#endif
698			need_more_resources = 1;
699		}
700		if (free_msghdrs == NULL) {
701#ifdef MSG_DEBUG_OK
702			printf("no more msghdrs\n");
703#endif
704			need_more_resources = 1;
705		}
706
707		if (need_more_resources) {
708			int we_own_it;
709
710			if ((msgflg & IPC_NOWAIT) != 0) {
711#ifdef MSG_DEBUG_OK
712				printf("need more resources but caller doesn't want to wait\n");
713#endif
714				error = EAGAIN;
715				goto done2;
716			}
717
718			if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0) {
719#ifdef MSG_DEBUG_OK
720				printf("we don't own the msqid_ds\n");
721#endif
722				we_own_it = 0;
723			} else {
724				/* Force later arrivals to wait for our
725				   request */
726#ifdef MSG_DEBUG_OK
727				printf("we own the msqid_ds\n");
728#endif
729				msqptr->msg_perm.mode |= MSG_LOCKED;
730				we_own_it = 1;
731			}
732#ifdef MSG_DEBUG_OK
733			printf("goodnight\n");
734#endif
735			error = tsleep(msqptr, (PZERO - 4) | PCATCH,
736			    "msgwait", 0);
737#ifdef MSG_DEBUG_OK
738			printf("good morning, error=%d\n", error);
739#endif
740			if (we_own_it)
741				msqptr->msg_perm.mode &= ~MSG_LOCKED;
742			if (error != 0) {
743#ifdef MSG_DEBUG_OK
744				printf("msgsnd:  interrupted system call\n");
745#endif
746				error = EINTR;
747				goto done2;
748			}
749
750			/*
751			 * Make sure that the msq queue still exists
752			 */
753
754			if (msqptr->msg_qbytes == 0) {
755#ifdef MSG_DEBUG_OK
756				printf("msqid deleted\n");
757#endif
758				error = EIDRM;
759				goto done2;
760			}
761
762		} else {
763#ifdef MSG_DEBUG_OK
764			printf("got all the resources that we need\n");
765#endif
766			break;
767		}
768	}
769
770	/*
771	 * We have the resources that we need.
772	 * Make sure!
773	 */
774
775	if (msqptr->msg_perm.mode & MSG_LOCKED)
776		panic("msg_perm.mode & MSG_LOCKED");
777	if (segs_needed > nfree_msgmaps)
778		panic("segs_needed > nfree_msgmaps");
779	if (msgsz + msqptr->msg_cbytes > msqptr->msg_qbytes)
780		panic("msgsz + msg_cbytes > msg_qbytes");
781	if (free_msghdrs == NULL)
782		panic("no more msghdrs");
783
784	/*
785	 * Re-lock the msqid_ds in case we page-fault when copying in the
786	 * message
787	 */
788
789	if ((msqptr->msg_perm.mode & MSG_LOCKED) != 0)
790		panic("msqid_ds is already locked");
791	msqptr->msg_perm.mode |= MSG_LOCKED;
792
793	/*
794	 * Allocate a message header
795	 */
796
797	msghdr = free_msghdrs;
798	free_msghdrs = msghdr->msg_next;
799	msghdr->msg_spot = -1;
800	msghdr->msg_ts = msgsz;
801
802	/*
803	 * Allocate space for the message
804	 */
805
806	while (segs_needed > 0) {
807		if (nfree_msgmaps <= 0)
808			panic("not enough msgmaps");
809		if (free_msgmaps == -1)
810			panic("nil free_msgmaps");
811		next = free_msgmaps;
812		if (next <= -1)
813			panic("next too low #1");
814		if (next >= msginfo.msgseg)
815			panic("next out of range #1");
816#ifdef MSG_DEBUG_OK
817		printf("allocating segment %d to message\n", next);
818#endif
819		free_msgmaps = msgmaps[next].next;
820		nfree_msgmaps--;
821		msgmaps[next].next = msghdr->msg_spot;
822		msghdr->msg_spot = next;
823		segs_needed--;
824	}
825
826	/*
827	 * Copy in the message type
828	 */
829
830	if ((error = copyin(user_msgp, &msghdr->msg_type,
831	    sizeof(msghdr->msg_type))) != 0) {
832#ifdef MSG_DEBUG_OK
833		printf("error %d copying the message type\n", error);
834#endif
835		msg_freehdr(msghdr);
836		msqptr->msg_perm.mode &= ~MSG_LOCKED;
837		wakeup(msqptr);
838		goto done2;
839	}
840	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
841
842	/*
843	 * Validate the message type
844	 */
845
846	if (msghdr->msg_type < 1) {
847		msg_freehdr(msghdr);
848		msqptr->msg_perm.mode &= ~MSG_LOCKED;
849		wakeup(msqptr);
850#ifdef MSG_DEBUG_OK
851		printf("mtype (%d) < 1\n", msghdr->msg_type);
852#endif
853		error = EINVAL;
854		goto done2;
855	}
856
857	/*
858	 * Copy in the message body
859	 */
860
861	next = msghdr->msg_spot;
862	while (msgsz > 0) {
863		size_t tlen;
864		if (msgsz > msginfo.msgssz)
865			tlen = msginfo.msgssz;
866		else
867			tlen = msgsz;
868		if (next <= -1)
869			panic("next too low #2");
870		if (next >= msginfo.msgseg)
871			panic("next out of range #2");
872		if ((error = copyin(user_msgp, &msgpool[next * msginfo.msgssz],
873		    tlen)) != 0) {
874#ifdef MSG_DEBUG_OK
875			printf("error %d copying in message segment\n", error);
876#endif
877			msg_freehdr(msghdr);
878			msqptr->msg_perm.mode &= ~MSG_LOCKED;
879			wakeup(msqptr);
880			goto done2;
881		}
882		msgsz -= tlen;
883		user_msgp = (char *)user_msgp + tlen;
884		next = msgmaps[next].next;
885	}
886	if (next != -1)
887		panic("didn't use all the msg segments");
888
889	/*
890	 * We've got the message.  Unlock the msqid_ds.
891	 */
892
893	msqptr->msg_perm.mode &= ~MSG_LOCKED;
894
895	/*
896	 * Make sure that the msqid_ds is still allocated.
897	 */
898
899	if (msqptr->msg_qbytes == 0) {
900		msg_freehdr(msghdr);
901		wakeup(msqptr);
902		error = EIDRM;
903		goto done2;
904	}
905
906	/*
907	 * Put the message into the queue
908	 */
909
910	if (msqptr->msg_first == NULL) {
911		msqptr->msg_first = msghdr;
912		msqptr->msg_last = msghdr;
913	} else {
914		msqptr->msg_last->msg_next = msghdr;
915		msqptr->msg_last = msghdr;
916	}
917	msqptr->msg_last->msg_next = NULL;
918
919	msqptr->msg_cbytes += msghdr->msg_ts;
920	msqptr->msg_qnum++;
921	msqptr->msg_lspid = td->td_proc->p_pid;
922	msqptr->msg_stime = time_second;
923
924	wakeup(msqptr);
925	td->td_retval[0] = 0;
926done2:
927	mtx_unlock(&Giant);
928	return (error);
929}
930
931#ifndef _SYS_SYSPROTO_H_
932struct msgrcv_args {
933	int	msqid;
934	void	*msgp;
935	size_t	msgsz;
936	long	msgtyp;
937	int	msgflg;
938};
939#endif
940
941/*
942 * MPSAFE
943 */
944int
945msgrcv(td, uap)
946	struct thread *td;
947	register struct msgrcv_args *uap;
948{
949	int msqid = uap->msqid;
950	void *user_msgp = uap->msgp;
951	size_t msgsz = uap->msgsz;
952	long msgtyp = uap->msgtyp;
953	int msgflg = uap->msgflg;
954	size_t len;
955	register struct msqid_ds *msqptr;
956	register struct msg *msghdr;
957	int error = 0;
958	short next;
959
960#ifdef MSG_DEBUG_OK
961	printf("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp,
962	    msgsz, msgtyp, msgflg);
963#endif
964
965	if (!jail_sysvipc_allowed && jailed(td->td_ucred))
966		return (ENOSYS);
967
968	mtx_lock(&Giant);
969	msqid = IPCID_TO_IX(msqid);
970
971	if (msqid < 0 || msqid >= msginfo.msgmni) {
972#ifdef MSG_DEBUG_OK
973		printf("msqid (%d) out of range (0<=msqid<%d)\n", msqid,
974		    msginfo.msgmni);
975#endif
976		error = EINVAL;
977		goto done2;
978	}
979
980	msqptr = &msqids[msqid];
981	if (msqptr->msg_qbytes == 0) {
982#ifdef MSG_DEBUG_OK
983		printf("no such message queue id\n");
984#endif
985		error = EINVAL;
986		goto done2;
987	}
988	if (msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
989#ifdef MSG_DEBUG_OK
990		printf("wrong sequence number\n");
991#endif
992		error = EINVAL;
993		goto done2;
994	}
995
996	if ((error = ipcperm(td, &msqptr->msg_perm, IPC_R))) {
997#ifdef MSG_DEBUG_OK
998		printf("requester doesn't have read access\n");
999#endif
1000		goto done2;
1001	}
1002
1003	msghdr = NULL;
1004	while (msghdr == NULL) {
1005		if (msgtyp == 0) {
1006			msghdr = msqptr->msg_first;
1007			if (msghdr != NULL) {
1008				if (msgsz < msghdr->msg_ts &&
1009				    (msgflg & MSG_NOERROR) == 0) {
1010#ifdef MSG_DEBUG_OK
1011					printf("first message on the queue is too big (want %d, got %d)\n",
1012					    msgsz, msghdr->msg_ts);
1013#endif
1014					error = E2BIG;
1015					goto done2;
1016				}
1017				if (msqptr->msg_first == msqptr->msg_last) {
1018					msqptr->msg_first = NULL;
1019					msqptr->msg_last = NULL;
1020				} else {
1021					msqptr->msg_first = msghdr->msg_next;
1022					if (msqptr->msg_first == NULL)
1023						panic("msg_first/last screwed up #1");
1024				}
1025			}
1026		} else {
1027			struct msg *previous;
1028			struct msg **prev;
1029
1030			previous = NULL;
1031			prev = &(msqptr->msg_first);
1032			while ((msghdr = *prev) != NULL) {
1033				/*
1034				 * Is this message's type an exact match or is
1035				 * this message's type less than or equal to
1036				 * the absolute value of a negative msgtyp?
1037				 * Note that the second half of this test can
1038				 * NEVER be true if msgtyp is positive since
1039				 * msg_type is always positive!
1040				 */
1041
1042				if (msgtyp == msghdr->msg_type ||
1043				    msghdr->msg_type <= -msgtyp) {
1044#ifdef MSG_DEBUG_OK
1045					printf("found message type %d, requested %d\n",
1046					    msghdr->msg_type, msgtyp);
1047#endif
1048					if (msgsz < msghdr->msg_ts &&
1049					    (msgflg & MSG_NOERROR) == 0) {
1050#ifdef MSG_DEBUG_OK
1051						printf("requested message on the queue is too big (want %d, got %d)\n",
1052						    msgsz, msghdr->msg_ts);
1053#endif
1054						error = E2BIG;
1055						goto done2;
1056					}
1057					*prev = msghdr->msg_next;
1058					if (msghdr == msqptr->msg_last) {
1059						if (previous == NULL) {
1060							if (prev !=
1061							    &msqptr->msg_first)
1062								panic("msg_first/last screwed up #2");
1063							msqptr->msg_first =
1064							    NULL;
1065							msqptr->msg_last =
1066							    NULL;
1067						} else {
1068							if (prev ==
1069							    &msqptr->msg_first)
1070								panic("msg_first/last screwed up #3");
1071							msqptr->msg_last =
1072							    previous;
1073						}
1074					}
1075					break;
1076				}
1077				previous = msghdr;
1078				prev = &(msghdr->msg_next);
1079			}
1080		}
1081
1082		/*
1083		 * We've either extracted the msghdr for the appropriate
1084		 * message or there isn't one.
1085		 * If there is one then bail out of this loop.
1086		 */
1087
1088		if (msghdr != NULL)
1089			break;
1090
1091		/*
1092		 * Hmph!  No message found.  Does the user want to wait?
1093		 */
1094
1095		if ((msgflg & IPC_NOWAIT) != 0) {
1096#ifdef MSG_DEBUG_OK
1097			printf("no appropriate message found (msgtyp=%d)\n",
1098			    msgtyp);
1099#endif
1100			/* The SVID says to return ENOMSG. */
1101			error = ENOMSG;
1102			goto done2;
1103		}
1104
1105		/*
1106		 * Wait for something to happen
1107		 */
1108
1109#ifdef MSG_DEBUG_OK
1110		printf("msgrcv:  goodnight\n");
1111#endif
1112		error = tsleep(msqptr, (PZERO - 4) | PCATCH, "msgwait", 0);
1113#ifdef MSG_DEBUG_OK
1114		printf("msgrcv:  good morning (error=%d)\n", error);
1115#endif
1116
1117		if (error != 0) {
1118#ifdef MSG_DEBUG_OK
1119			printf("msgsnd:  interrupted system call\n");
1120#endif
1121			error = EINTR;
1122			goto done2;
1123		}
1124
1125		/*
1126		 * Make sure that the msq queue still exists
1127		 */
1128
1129		if (msqptr->msg_qbytes == 0 ||
1130		    msqptr->msg_perm.seq != IPCID_TO_SEQ(uap->msqid)) {
1131#ifdef MSG_DEBUG_OK
1132			printf("msqid deleted\n");
1133#endif
1134			error = EIDRM;
1135			goto done2;
1136		}
1137	}
1138
1139	/*
1140	 * Return the message to the user.
1141	 *
1142	 * First, do the bookkeeping (before we risk being interrupted).
1143	 */
1144
1145	msqptr->msg_cbytes -= msghdr->msg_ts;
1146	msqptr->msg_qnum--;
1147	msqptr->msg_lrpid = td->td_proc->p_pid;
1148	msqptr->msg_rtime = time_second;
1149
1150	/*
1151	 * Make msgsz the actual amount that we'll be returning.
1152	 * Note that this effectively truncates the message if it is too long
1153	 * (since msgsz is never increased).
1154	 */
1155
1156#ifdef MSG_DEBUG_OK
1157	printf("found a message, msgsz=%d, msg_ts=%d\n", msgsz,
1158	    msghdr->msg_ts);
1159#endif
1160	if (msgsz > msghdr->msg_ts)
1161		msgsz = msghdr->msg_ts;
1162
1163	/*
1164	 * Return the type to the user.
1165	 */
1166
1167	error = copyout(&(msghdr->msg_type), user_msgp,
1168	    sizeof(msghdr->msg_type));
1169	if (error != 0) {
1170#ifdef MSG_DEBUG_OK
1171		printf("error (%d) copying out message type\n", error);
1172#endif
1173		msg_freehdr(msghdr);
1174		wakeup(msqptr);
1175		goto done2;
1176	}
1177	user_msgp = (char *)user_msgp + sizeof(msghdr->msg_type);
1178
1179	/*
1180	 * Return the segments to the user
1181	 */
1182
1183	next = msghdr->msg_spot;
1184	for (len = 0; len < msgsz; len += msginfo.msgssz) {
1185		size_t tlen;
1186
1187		if (msgsz - len > msginfo.msgssz)
1188			tlen = msginfo.msgssz;
1189		else
1190			tlen = msgsz - len;
1191		if (next <= -1)
1192			panic("next too low #3");
1193		if (next >= msginfo.msgseg)
1194			panic("next out of range #3");
1195		error = copyout(&msgpool[next * msginfo.msgssz],
1196		    user_msgp, tlen);
1197		if (error != 0) {
1198#ifdef MSG_DEBUG_OK
1199			printf("error (%d) copying out message segment\n",
1200			    error);
1201#endif
1202			msg_freehdr(msghdr);
1203			wakeup(msqptr);
1204			goto done2;
1205		}
1206		user_msgp = (char *)user_msgp + tlen;
1207		next = msgmaps[next].next;
1208	}
1209
1210	/*
1211	 * Done, return the actual number of bytes copied out.
1212	 */
1213
1214	msg_freehdr(msghdr);
1215	wakeup(msqptr);
1216	td->td_retval[0] = msgsz;
1217done2:
1218	mtx_unlock(&Giant);
1219	return (error);
1220}
1221
1222static int
1223sysctl_msqids(SYSCTL_HANDLER_ARGS)
1224{
1225
1226	return (SYSCTL_OUT(req, msqids,
1227	    sizeof(struct msqid_ds) * msginfo.msgmni));
1228}
1229
1230SYSCTL_DECL(_kern_ipc);
1231SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0, "");
1232SYSCTL_INT(_kern_ipc, OID_AUTO, msgmni, CTLFLAG_RD, &msginfo.msgmni, 0, "");
1233SYSCTL_INT(_kern_ipc, OID_AUTO, msgmnb, CTLFLAG_RD, &msginfo.msgmnb, 0, "");
1234SYSCTL_INT(_kern_ipc, OID_AUTO, msgtql, CTLFLAG_RD, &msginfo.msgtql, 0, "");
1235SYSCTL_INT(_kern_ipc, OID_AUTO, msgssz, CTLFLAG_RD, &msginfo.msgssz, 0, "");
1236SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RD, &msginfo.msgseg, 0, "");
1237SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLFLAG_RD,
1238    NULL, 0, sysctl_msqids, "", "Message queue IDs");
1239