sysv_shm.c revision 220398
1/*	$NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $	*/
2/*-
3 * Copyright (c) 1994 Adam Glass and Charles Hannum.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by Adam Glass and Charles
16 *	Hannum.
17 * 4. The names of the authors may not be used to endorse or promote products
18 *    derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31/*-
32 * Copyright (c) 2003-2005 McAfee, Inc.
33 * All rights reserved.
34 *
35 * This software was developed for the FreeBSD Project in part by McAfee
36 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
37 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
38 * program.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 *    notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 *    notice, this list of conditions and the following disclaimer in the
47 *    documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62#include <sys/cdefs.h>
63__FBSDID("$FreeBSD: head/sys/kern/sysv_shm.c 220398 2011-04-06 18:11:24Z trasz $");
64
65#include "opt_compat.h"
66#include "opt_sysvipc.h"
67
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/kernel.h>
71#include <sys/limits.h>
72#include <sys/lock.h>
73#include <sys/sysctl.h>
74#include <sys/shm.h>
75#include <sys/proc.h>
76#include <sys/malloc.h>
77#include <sys/mman.h>
78#include <sys/module.h>
79#include <sys/mutex.h>
80#include <sys/racct.h>
81#include <sys/resourcevar.h>
82#include <sys/stat.h>
83#include <sys/syscall.h>
84#include <sys/syscallsubr.h>
85#include <sys/sysent.h>
86#include <sys/sysproto.h>
87#include <sys/jail.h>
88
89#include <security/mac/mac_framework.h>
90
91#include <vm/vm.h>
92#include <vm/vm_param.h>
93#include <vm/pmap.h>
94#include <vm/vm_object.h>
95#include <vm/vm_map.h>
96#include <vm/vm_page.h>
97#include <vm/vm_pager.h>
98
99FEATURE(sysv_shm, "System V shared memory segments support");
100
101static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments");
102
103static int shmget_allocate_segment(struct thread *td,
104    struct shmget_args *uap, int mode);
105static int shmget_existing(struct thread *td, struct shmget_args *uap,
106    int mode, int segnum);
107
108#define	SHMSEG_FREE     	0x0200
109#define	SHMSEG_REMOVED  	0x0400
110#define	SHMSEG_ALLOCATED	0x0800
111#define	SHMSEG_WANTED		0x1000
112
113static int shm_last_free, shm_nused, shmalloced;
114vm_size_t shm_committed;
115static struct shmid_kernel	*shmsegs;
116
117struct shmmap_state {
118	vm_offset_t va;
119	int shmid;
120};
121
122static void shm_deallocate_segment(struct shmid_kernel *);
123static int shm_find_segment_by_key(key_t);
124static struct shmid_kernel *shm_find_segment_by_shmid(int);
125static struct shmid_kernel *shm_find_segment_by_shmidx(int);
126static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
127static void shmrealloc(void);
128static int shminit(void);
129static int sysvshm_modload(struct module *, int, void *);
130static int shmunload(void);
131static void shmexit_myhook(struct vmspace *vm);
132static void shmfork_myhook(struct proc *p1, struct proc *p2);
133static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
134
135/*
136 * Tuneable values.
137 */
138#ifndef SHMMAXPGS
139#define	SHMMAXPGS	131072	/* Note: sysv shared memory is swap backed. */
140#endif
141#ifndef SHMMAX
142#define	SHMMAX	(SHMMAXPGS*PAGE_SIZE)
143#endif
144#ifndef SHMMIN
145#define	SHMMIN	1
146#endif
147#ifndef SHMMNI
148#define	SHMMNI	192
149#endif
150#ifndef SHMSEG
151#define	SHMSEG	128
152#endif
153#ifndef SHMALL
154#define	SHMALL	(SHMMAXPGS)
155#endif
156
157struct	shminfo shminfo = {
158	SHMMAX,
159	SHMMIN,
160	SHMMNI,
161	SHMSEG,
162	SHMALL
163};
164
165static int shm_use_phys;
166static int shm_allow_removed;
167
168SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0,
169    "Maximum shared memory segment size");
170SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0,
171    "Minimum shared memory segment size");
172SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RDTUN, &shminfo.shmmni, 0,
173    "Number of shared memory identifiers");
174SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RDTUN, &shminfo.shmseg, 0,
175    "Number of segments per process");
176SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0,
177    "Maximum number of pages available for shared memory");
178SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW,
179    &shm_use_phys, 0, "Enable/Disable locking of shared memory pages in core");
180SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RW,
181    &shm_allow_removed, 0,
182    "Enable/Disable attachment to attached segments marked for removal");
183SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLTYPE_OPAQUE | CTLFLAG_RD,
184    NULL, 0, sysctl_shmsegs, "",
185    "Current number of shared memory segments allocated");
186
187static int
188shm_find_segment_by_key(key)
189	key_t key;
190{
191	int i;
192
193	for (i = 0; i < shmalloced; i++)
194		if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
195		    shmsegs[i].u.shm_perm.key == key)
196			return (i);
197	return (-1);
198}
199
200static struct shmid_kernel *
201shm_find_segment_by_shmid(int shmid)
202{
203	int segnum;
204	struct shmid_kernel *shmseg;
205
206	segnum = IPCID_TO_IX(shmid);
207	if (segnum < 0 || segnum >= shmalloced)
208		return (NULL);
209	shmseg = &shmsegs[segnum];
210	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
211	    (!shm_allow_removed &&
212	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) ||
213	    shmseg->u.shm_perm.seq != IPCID_TO_SEQ(shmid))
214		return (NULL);
215	return (shmseg);
216}
217
218static struct shmid_kernel *
219shm_find_segment_by_shmidx(int segnum)
220{
221	struct shmid_kernel *shmseg;
222
223	if (segnum < 0 || segnum >= shmalloced)
224		return (NULL);
225	shmseg = &shmsegs[segnum];
226	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
227	    (!shm_allow_removed &&
228	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0))
229		return (NULL);
230	return (shmseg);
231}
232
233static void
234shm_deallocate_segment(shmseg)
235	struct shmid_kernel *shmseg;
236{
237	vm_size_t size;
238
239	GIANT_REQUIRED;
240
241	vm_object_deallocate(shmseg->object);
242	shmseg->object = NULL;
243	size = round_page(shmseg->u.shm_segsz);
244	shm_committed -= btoc(size);
245	shm_nused--;
246	shmseg->u.shm_perm.mode = SHMSEG_FREE;
247#ifdef MAC
248	mac_sysvshm_cleanup(shmseg);
249#endif
250	racct_sub_cred(shmseg->cred, RACCT_NSHM, 1);
251	racct_sub_cred(shmseg->cred, RACCT_SHMSIZE, size);
252	crfree(shmseg->cred);
253	shmseg->cred = NULL;
254}
255
256static int
257shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
258{
259	struct shmid_kernel *shmseg;
260	int segnum, result;
261	vm_size_t size;
262
263	GIANT_REQUIRED;
264
265	segnum = IPCID_TO_IX(shmmap_s->shmid);
266	shmseg = &shmsegs[segnum];
267	size = round_page(shmseg->u.shm_segsz);
268	result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size);
269	if (result != KERN_SUCCESS)
270		return (EINVAL);
271	shmmap_s->shmid = -1;
272	shmseg->u.shm_dtime = time_second;
273	if ((--shmseg->u.shm_nattch <= 0) &&
274	    (shmseg->u.shm_perm.mode & SHMSEG_REMOVED)) {
275		shm_deallocate_segment(shmseg);
276		shm_last_free = segnum;
277	}
278	return (0);
279}
280
281#ifndef _SYS_SYSPROTO_H_
282struct shmdt_args {
283	const void *shmaddr;
284};
285#endif
286int
287shmdt(td, uap)
288	struct thread *td;
289	struct shmdt_args *uap;
290{
291	struct proc *p = td->td_proc;
292	struct shmmap_state *shmmap_s;
293#ifdef MAC
294	struct shmid_kernel *shmsegptr;
295#endif
296	int i;
297	int error = 0;
298
299	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
300		return (ENOSYS);
301	mtx_lock(&Giant);
302	shmmap_s = p->p_vmspace->vm_shm;
303 	if (shmmap_s == NULL) {
304		error = EINVAL;
305		goto done2;
306	}
307	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) {
308		if (shmmap_s->shmid != -1 &&
309		    shmmap_s->va == (vm_offset_t)uap->shmaddr) {
310			break;
311		}
312	}
313	if (i == shminfo.shmseg) {
314		error = EINVAL;
315		goto done2;
316	}
317#ifdef MAC
318	shmsegptr = &shmsegs[IPCID_TO_IX(shmmap_s->shmid)];
319	error = mac_sysvshm_check_shmdt(td->td_ucred, shmsegptr);
320	if (error != 0)
321		goto done2;
322#endif
323	error = shm_delete_mapping(p->p_vmspace, shmmap_s);
324done2:
325	mtx_unlock(&Giant);
326	return (error);
327}
328
329#ifndef _SYS_SYSPROTO_H_
330struct shmat_args {
331	int shmid;
332	const void *shmaddr;
333	int shmflg;
334};
335#endif
336int
337kern_shmat(td, shmid, shmaddr, shmflg)
338	struct thread *td;
339	int shmid;
340	const void *shmaddr;
341	int shmflg;
342{
343	struct proc *p = td->td_proc;
344	int i, flags;
345	struct shmid_kernel *shmseg;
346	struct shmmap_state *shmmap_s = NULL;
347	vm_offset_t attach_va;
348	vm_prot_t prot;
349	vm_size_t size;
350	int rv;
351	int error = 0;
352
353	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
354		return (ENOSYS);
355	mtx_lock(&Giant);
356	shmmap_s = p->p_vmspace->vm_shm;
357	if (shmmap_s == NULL) {
358		shmmap_s = malloc(shminfo.shmseg * sizeof(struct shmmap_state),
359		    M_SHM, M_WAITOK);
360		for (i = 0; i < shminfo.shmseg; i++)
361			shmmap_s[i].shmid = -1;
362		p->p_vmspace->vm_shm = shmmap_s;
363	}
364	shmseg = shm_find_segment_by_shmid(shmid);
365	if (shmseg == NULL) {
366		error = EINVAL;
367		goto done2;
368	}
369	error = ipcperm(td, &shmseg->u.shm_perm,
370	    (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
371	if (error)
372		goto done2;
373#ifdef MAC
374	error = mac_sysvshm_check_shmat(td->td_ucred, shmseg, shmflg);
375	if (error != 0)
376		goto done2;
377#endif
378	for (i = 0; i < shminfo.shmseg; i++) {
379		if (shmmap_s->shmid == -1)
380			break;
381		shmmap_s++;
382	}
383	if (i >= shminfo.shmseg) {
384		error = EMFILE;
385		goto done2;
386	}
387	size = round_page(shmseg->u.shm_segsz);
388	prot = VM_PROT_READ;
389	if ((shmflg & SHM_RDONLY) == 0)
390		prot |= VM_PROT_WRITE;
391	flags = MAP_ANON | MAP_SHARED;
392	if (shmaddr) {
393		flags |= MAP_FIXED;
394		if (shmflg & SHM_RND) {
395			attach_va = (vm_offset_t)shmaddr & ~(SHMLBA-1);
396		} else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) {
397			attach_va = (vm_offset_t)shmaddr;
398		} else {
399			error = EINVAL;
400			goto done2;
401		}
402	} else {
403		/*
404		 * This is just a hint to vm_map_find() about where to
405		 * put it.
406		 */
407		PROC_LOCK(p);
408		attach_va = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
409		    lim_max(p, RLIMIT_DATA));
410		PROC_UNLOCK(p);
411	}
412
413	vm_object_reference(shmseg->object);
414	rv = vm_map_find(&p->p_vmspace->vm_map, shmseg->object,
415	    0, &attach_va, size, (flags & MAP_FIXED) ? VMFS_NO_SPACE :
416	    VMFS_ANY_SPACE, prot, prot, 0);
417	if (rv != KERN_SUCCESS) {
418		vm_object_deallocate(shmseg->object);
419		error = ENOMEM;
420		goto done2;
421	}
422	vm_map_inherit(&p->p_vmspace->vm_map,
423		attach_va, attach_va + size, VM_INHERIT_SHARE);
424
425	shmmap_s->va = attach_va;
426	shmmap_s->shmid = shmid;
427	shmseg->u.shm_lpid = p->p_pid;
428	shmseg->u.shm_atime = time_second;
429	shmseg->u.shm_nattch++;
430	td->td_retval[0] = attach_va;
431done2:
432	mtx_unlock(&Giant);
433	return (error);
434}
435
436int
437shmat(td, uap)
438	struct thread *td;
439	struct shmat_args *uap;
440{
441	return kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg);
442}
443
444int
445kern_shmctl(td, shmid, cmd, buf, bufsz)
446	struct thread *td;
447	int shmid;
448	int cmd;
449	void *buf;
450	size_t *bufsz;
451{
452	int error = 0;
453	struct shmid_kernel *shmseg;
454
455	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
456		return (ENOSYS);
457
458	mtx_lock(&Giant);
459	switch (cmd) {
460	/*
461	 * It is possible that kern_shmctl is being called from the Linux ABI
462	 * layer, in which case, we will need to implement IPC_INFO.  It should
463	 * be noted that other shmctl calls will be funneled through here for
464	 * Linix binaries as well.
465	 *
466	 * NB: The Linux ABI layer will convert this data to structure(s) more
467	 * consistent with the Linux ABI.
468	 */
469	case IPC_INFO:
470		memcpy(buf, &shminfo, sizeof(shminfo));
471		if (bufsz)
472			*bufsz = sizeof(shminfo);
473		td->td_retval[0] = shmalloced;
474		goto done2;
475	case SHM_INFO: {
476		struct shm_info shm_info;
477		shm_info.used_ids = shm_nused;
478		shm_info.shm_rss = 0;	/*XXX where to get from ? */
479		shm_info.shm_tot = 0;	/*XXX where to get from ? */
480		shm_info.shm_swp = 0;	/*XXX where to get from ? */
481		shm_info.swap_attempts = 0;	/*XXX where to get from ? */
482		shm_info.swap_successes = 0;	/*XXX where to get from ? */
483		memcpy(buf, &shm_info, sizeof(shm_info));
484		if (bufsz)
485			*bufsz = sizeof(shm_info);
486		td->td_retval[0] = shmalloced;
487		goto done2;
488	}
489	}
490	if (cmd == SHM_STAT)
491		shmseg = shm_find_segment_by_shmidx(shmid);
492	else
493		shmseg = shm_find_segment_by_shmid(shmid);
494	if (shmseg == NULL) {
495		error = EINVAL;
496		goto done2;
497	}
498#ifdef MAC
499	error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, cmd);
500	if (error != 0)
501		goto done2;
502#endif
503	switch (cmd) {
504	case SHM_STAT:
505	case IPC_STAT:
506		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
507		if (error)
508			goto done2;
509		memcpy(buf, &shmseg->u, sizeof(struct shmid_ds));
510		if (bufsz)
511			*bufsz = sizeof(struct shmid_ds);
512		if (cmd == SHM_STAT)
513			td->td_retval[0] = IXSEQ_TO_IPCID(shmid, shmseg->u.shm_perm);
514		break;
515	case IPC_SET: {
516		struct shmid_ds *shmid;
517
518		shmid = (struct shmid_ds *)buf;
519		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
520		if (error)
521			goto done2;
522		shmseg->u.shm_perm.uid = shmid->shm_perm.uid;
523		shmseg->u.shm_perm.gid = shmid->shm_perm.gid;
524		shmseg->u.shm_perm.mode =
525		    (shmseg->u.shm_perm.mode & ~ACCESSPERMS) |
526		    (shmid->shm_perm.mode & ACCESSPERMS);
527		shmseg->u.shm_ctime = time_second;
528		break;
529	}
530	case IPC_RMID:
531		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
532		if (error)
533			goto done2;
534		shmseg->u.shm_perm.key = IPC_PRIVATE;
535		shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
536		if (shmseg->u.shm_nattch <= 0) {
537			shm_deallocate_segment(shmseg);
538			shm_last_free = IPCID_TO_IX(shmid);
539		}
540		break;
541#if 0
542	case SHM_LOCK:
543	case SHM_UNLOCK:
544#endif
545	default:
546		error = EINVAL;
547		break;
548	}
549done2:
550	mtx_unlock(&Giant);
551	return (error);
552}
553
554#ifndef _SYS_SYSPROTO_H_
555struct shmctl_args {
556	int shmid;
557	int cmd;
558	struct shmid_ds *buf;
559};
560#endif
561int
562shmctl(td, uap)
563	struct thread *td;
564	struct shmctl_args *uap;
565{
566	int error = 0;
567	struct shmid_ds buf;
568	size_t bufsz;
569
570	/*
571	 * The only reason IPC_INFO, SHM_INFO, SHM_STAT exists is to support
572	 * Linux binaries.  If we see the call come through the FreeBSD ABI,
573	 * return an error back to the user since we do not to support this.
574	 */
575	if (uap->cmd == IPC_INFO || uap->cmd == SHM_INFO ||
576	    uap->cmd == SHM_STAT)
577		return (EINVAL);
578
579	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
580	if (uap->cmd == IPC_SET) {
581		if ((error = copyin(uap->buf, &buf, sizeof(struct shmid_ds))))
582			goto done;
583	}
584
585	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
586	if (error)
587		goto done;
588
589	/* Cases in which we need to copyout */
590	switch (uap->cmd) {
591	case IPC_STAT:
592		error = copyout(&buf, uap->buf, bufsz);
593		break;
594	}
595
596done:
597	if (error) {
598		/* Invalidate the return value */
599		td->td_retval[0] = -1;
600	}
601	return (error);
602}
603
604
605static int
606shmget_existing(td, uap, mode, segnum)
607	struct thread *td;
608	struct shmget_args *uap;
609	int mode;
610	int segnum;
611{
612	struct shmid_kernel *shmseg;
613	int error;
614
615	shmseg = &shmsegs[segnum];
616	if (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) {
617		/*
618		 * This segment is in the process of being allocated.  Wait
619		 * until it's done, and look the key up again (in case the
620		 * allocation failed or it was freed).
621		 */
622		shmseg->u.shm_perm.mode |= SHMSEG_WANTED;
623		error = tsleep(shmseg, PLOCK | PCATCH, "shmget", 0);
624		if (error)
625			return (error);
626		return (EAGAIN);
627	}
628	if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
629		return (EEXIST);
630#ifdef MAC
631	error = mac_sysvshm_check_shmget(td->td_ucred, shmseg, uap->shmflg);
632	if (error != 0)
633		return (error);
634#endif
635	if (uap->size != 0 && uap->size > shmseg->u.shm_segsz)
636		return (EINVAL);
637	td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
638	return (0);
639}
640
641static int
642shmget_allocate_segment(td, uap, mode)
643	struct thread *td;
644	struct shmget_args *uap;
645	int mode;
646{
647	int i, segnum, shmid;
648	size_t size;
649	struct ucred *cred = td->td_ucred;
650	struct shmid_kernel *shmseg;
651	vm_object_t shm_object;
652
653	GIANT_REQUIRED;
654
655	if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax)
656		return (EINVAL);
657	if (shm_nused >= shminfo.shmmni) /* Any shmids left? */
658		return (ENOSPC);
659	size = round_page(uap->size);
660	if (shm_committed + btoc(size) > shminfo.shmall)
661		return (ENOMEM);
662	if (shm_last_free < 0) {
663		shmrealloc();	/* Maybe expand the shmsegs[] array. */
664		for (i = 0; i < shmalloced; i++)
665			if (shmsegs[i].u.shm_perm.mode & SHMSEG_FREE)
666				break;
667		if (i == shmalloced)
668			return (ENOSPC);
669		segnum = i;
670	} else  {
671		segnum = shm_last_free;
672		shm_last_free = -1;
673	}
674	shmseg = &shmsegs[segnum];
675	PROC_LOCK(td->td_proc);
676	if (racct_add(td->td_proc, RACCT_NSHM, 1)) {
677		PROC_UNLOCK(td->td_proc);
678		return (ENOSPC);
679	}
680	if (racct_add(td->td_proc, RACCT_SHMSIZE, size)) {
681		racct_sub(td->td_proc, RACCT_NSHM, 1);
682		PROC_UNLOCK(td->td_proc);
683		return (ENOMEM);
684	}
685	PROC_UNLOCK(td->td_proc);
686	/*
687	 * In case we sleep in malloc(), mark the segment present but deleted
688	 * so that noone else tries to create the same key.
689	 */
690	shmseg->u.shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
691	shmseg->u.shm_perm.key = uap->key;
692	shmseg->u.shm_perm.seq = (shmseg->u.shm_perm.seq + 1) & 0x7fff;
693	shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
694
695	/*
696	 * We make sure that we have allocated a pager before we need
697	 * to.
698	 */
699	shm_object = vm_pager_allocate(shm_use_phys ? OBJT_PHYS : OBJT_SWAP,
700	    0, size, VM_PROT_DEFAULT, 0, cred);
701	if (shm_object == NULL) {
702		PROC_LOCK(td->td_proc);
703		racct_sub(td->td_proc, RACCT_NSHM, 1);
704		racct_sub(td->td_proc, RACCT_SHMSIZE, size);
705		PROC_UNLOCK(td->td_proc);
706		return (ENOMEM);
707	}
708	VM_OBJECT_LOCK(shm_object);
709	vm_object_clear_flag(shm_object, OBJ_ONEMAPPING);
710	vm_object_set_flag(shm_object, OBJ_NOSPLIT);
711	VM_OBJECT_UNLOCK(shm_object);
712
713	shmseg->object = shm_object;
714	shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = cred->cr_uid;
715	shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid;
716	shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) |
717	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
718	crhold(cred);
719	shmseg->cred = cred;
720	shmseg->u.shm_segsz = uap->size;
721	shmseg->u.shm_cpid = td->td_proc->p_pid;
722	shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0;
723	shmseg->u.shm_atime = shmseg->u.shm_dtime = 0;
724#ifdef MAC
725	mac_sysvshm_create(cred, shmseg);
726#endif
727	shmseg->u.shm_ctime = time_second;
728	shm_committed += btoc(size);
729	shm_nused++;
730	if (shmseg->u.shm_perm.mode & SHMSEG_WANTED) {
731		/*
732		 * Somebody else wanted this key while we were asleep.  Wake
733		 * them up now.
734		 */
735		shmseg->u.shm_perm.mode &= ~SHMSEG_WANTED;
736		wakeup(shmseg);
737	}
738	td->td_retval[0] = shmid;
739	return (0);
740}
741
742#ifndef _SYS_SYSPROTO_H_
743struct shmget_args {
744	key_t key;
745	size_t size;
746	int shmflg;
747};
748#endif
749int
750shmget(td, uap)
751	struct thread *td;
752	struct shmget_args *uap;
753{
754	int segnum, mode;
755	int error;
756
757	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
758		return (ENOSYS);
759	mtx_lock(&Giant);
760	mode = uap->shmflg & ACCESSPERMS;
761	if (uap->key != IPC_PRIVATE) {
762	again:
763		segnum = shm_find_segment_by_key(uap->key);
764		if (segnum >= 0) {
765			error = shmget_existing(td, uap, mode, segnum);
766			if (error == EAGAIN)
767				goto again;
768			goto done2;
769		}
770		if ((uap->shmflg & IPC_CREAT) == 0) {
771			error = ENOENT;
772			goto done2;
773		}
774	}
775	error = shmget_allocate_segment(td, uap, mode);
776done2:
777	mtx_unlock(&Giant);
778	return (error);
779}
780
781static void
782shmfork_myhook(p1, p2)
783	struct proc *p1, *p2;
784{
785	struct shmmap_state *shmmap_s;
786	size_t size;
787	int i;
788
789	mtx_lock(&Giant);
790	size = shminfo.shmseg * sizeof(struct shmmap_state);
791	shmmap_s = malloc(size, M_SHM, M_WAITOK);
792	bcopy(p1->p_vmspace->vm_shm, shmmap_s, size);
793	p2->p_vmspace->vm_shm = shmmap_s;
794	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
795		if (shmmap_s->shmid != -1)
796			shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++;
797	mtx_unlock(&Giant);
798}
799
800static void
801shmexit_myhook(struct vmspace *vm)
802{
803	struct shmmap_state *base, *shm;
804	int i;
805
806	if ((base = vm->vm_shm) != NULL) {
807		vm->vm_shm = NULL;
808		mtx_lock(&Giant);
809		for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) {
810			if (shm->shmid != -1)
811				shm_delete_mapping(vm, shm);
812		}
813		mtx_unlock(&Giant);
814		free(base, M_SHM);
815	}
816}
817
818static void
819shmrealloc(void)
820{
821	int i;
822	struct shmid_kernel *newsegs;
823
824	if (shmalloced >= shminfo.shmmni)
825		return;
826
827	newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK);
828	if (newsegs == NULL)
829		return;
830	for (i = 0; i < shmalloced; i++)
831		bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0]));
832	for (; i < shminfo.shmmni; i++) {
833		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
834		shmsegs[i].u.shm_perm.seq = 0;
835#ifdef MAC
836		mac_sysvshm_init(&shmsegs[i]);
837#endif
838	}
839	free(shmsegs, M_SHM);
840	shmsegs = newsegs;
841	shmalloced = shminfo.shmmni;
842}
843
844static struct syscall_helper_data shm_syscalls[] = {
845	SYSCALL_INIT_HELPER(shmat),
846	SYSCALL_INIT_HELPER(shmctl),
847	SYSCALL_INIT_HELPER(shmdt),
848	SYSCALL_INIT_HELPER(shmget),
849#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
850    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
851	SYSCALL_INIT_HELPER(freebsd7_shmctl),
852#endif
853#if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
854	SYSCALL_INIT_HELPER(shmsys),
855#endif
856	SYSCALL_INIT_LAST
857};
858
859#ifdef COMPAT_FREEBSD32
860#include <compat/freebsd32/freebsd32.h>
861#include <compat/freebsd32/freebsd32_ipc.h>
862#include <compat/freebsd32/freebsd32_proto.h>
863#include <compat/freebsd32/freebsd32_signal.h>
864#include <compat/freebsd32/freebsd32_syscall.h>
865#include <compat/freebsd32/freebsd32_util.h>
866
867static struct syscall_helper_data shm32_syscalls[] = {
868	SYSCALL32_INIT_HELPER(shmat),
869	SYSCALL32_INIT_HELPER(shmdt),
870	SYSCALL32_INIT_HELPER(shmget),
871	SYSCALL32_INIT_HELPER(freebsd32_shmsys),
872	SYSCALL32_INIT_HELPER(freebsd32_shmctl),
873#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
874    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
875	SYSCALL32_INIT_HELPER(freebsd7_freebsd32_shmctl),
876#endif
877	SYSCALL_INIT_LAST
878};
879#endif
880
881static int
882shminit()
883{
884	int i, error;
885
886#ifndef BURN_BRIDGES
887	if (TUNABLE_ULONG_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall) != 0)
888		printf("kern.ipc.shmmaxpgs is now called kern.ipc.shmall!\n");
889#endif
890	TUNABLE_ULONG_FETCH("kern.ipc.shmall", &shminfo.shmall);
891
892	/* Initialize shmmax dealing with possible overflow. */
893	for (i = PAGE_SIZE; i > 0; i--) {
894		shminfo.shmmax = shminfo.shmall * i;
895		if (shminfo.shmmax >= shminfo.shmall)
896			break;
897	}
898
899	TUNABLE_ULONG_FETCH("kern.ipc.shmmin", &shminfo.shmmin);
900	TUNABLE_ULONG_FETCH("kern.ipc.shmmni", &shminfo.shmmni);
901	TUNABLE_ULONG_FETCH("kern.ipc.shmseg", &shminfo.shmseg);
902	TUNABLE_INT_FETCH("kern.ipc.shm_use_phys", &shm_use_phys);
903
904	shmalloced = shminfo.shmmni;
905	shmsegs = malloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK);
906	for (i = 0; i < shmalloced; i++) {
907		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
908		shmsegs[i].u.shm_perm.seq = 0;
909#ifdef MAC
910		mac_sysvshm_init(&shmsegs[i]);
911#endif
912	}
913	shm_last_free = 0;
914	shm_nused = 0;
915	shm_committed = 0;
916	shmexit_hook = &shmexit_myhook;
917	shmfork_hook = &shmfork_myhook;
918
919	error = syscall_helper_register(shm_syscalls);
920	if (error != 0)
921		return (error);
922#ifdef COMPAT_FREEBSD32
923	error = syscall32_helper_register(shm32_syscalls);
924	if (error != 0)
925		return (error);
926#endif
927	return (0);
928}
929
930static int
931shmunload()
932{
933	int i;
934
935	if (shm_nused > 0)
936		return (EBUSY);
937
938#ifdef COMPAT_FREEBSD32
939	syscall32_helper_unregister(shm32_syscalls);
940#endif
941	syscall_helper_unregister(shm_syscalls);
942
943	for (i = 0; i < shmalloced; i++) {
944#ifdef MAC
945		mac_sysvshm_destroy(&shmsegs[i]);
946#endif
947		/*
948		 * Objects might be still mapped into the processes
949		 * address spaces.  Actual free would happen on the
950		 * last mapping destruction.
951		 */
952		if (shmsegs[i].u.shm_perm.mode != SHMSEG_FREE)
953			vm_object_deallocate(shmsegs[i].object);
954	}
955	free(shmsegs, M_SHM);
956	shmexit_hook = NULL;
957	shmfork_hook = NULL;
958	return (0);
959}
960
961static int
962sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
963{
964
965	return (SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0])));
966}
967
968#if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
969struct oshmid_ds {
970	struct	ipc_perm_old shm_perm;	/* operation perms */
971	int	shm_segsz;		/* size of segment (bytes) */
972	u_short	shm_cpid;		/* pid, creator */
973	u_short	shm_lpid;		/* pid, last operation */
974	short	shm_nattch;		/* no. of current attaches */
975	time_t	shm_atime;		/* last attach time */
976	time_t	shm_dtime;		/* last detach time */
977	time_t	shm_ctime;		/* last change time */
978	void	*shm_handle;		/* internal handle for shm segment */
979};
980
981struct oshmctl_args {
982	int shmid;
983	int cmd;
984	struct oshmid_ds *ubuf;
985};
986
987static int
988oshmctl(struct thread *td, struct oshmctl_args *uap)
989{
990#ifdef COMPAT_43
991	int error = 0;
992	struct shmid_kernel *shmseg;
993	struct oshmid_ds outbuf;
994
995	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
996		return (ENOSYS);
997	mtx_lock(&Giant);
998	shmseg = shm_find_segment_by_shmid(uap->shmid);
999	if (shmseg == NULL) {
1000		error = EINVAL;
1001		goto done2;
1002	}
1003	switch (uap->cmd) {
1004	case IPC_STAT:
1005		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
1006		if (error)
1007			goto done2;
1008#ifdef MAC
1009		error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, uap->cmd);
1010		if (error != 0)
1011			goto done2;
1012#endif
1013		ipcperm_new2old(&shmseg->u.shm_perm, &outbuf.shm_perm);
1014		outbuf.shm_segsz = shmseg->u.shm_segsz;
1015		outbuf.shm_cpid = shmseg->u.shm_cpid;
1016		outbuf.shm_lpid = shmseg->u.shm_lpid;
1017		outbuf.shm_nattch = shmseg->u.shm_nattch;
1018		outbuf.shm_atime = shmseg->u.shm_atime;
1019		outbuf.shm_dtime = shmseg->u.shm_dtime;
1020		outbuf.shm_ctime = shmseg->u.shm_ctime;
1021		outbuf.shm_handle = shmseg->object;
1022		error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
1023		if (error)
1024			goto done2;
1025		break;
1026	default:
1027		error = freebsd7_shmctl(td, (struct freebsd7_shmctl_args *)uap);
1028		break;
1029	}
1030done2:
1031	mtx_unlock(&Giant);
1032	return (error);
1033#else
1034	return (EINVAL);
1035#endif
1036}
1037
1038/* XXX casting to (sy_call_t *) is bogus, as usual. */
1039static sy_call_t *shmcalls[] = {
1040	(sy_call_t *)shmat, (sy_call_t *)oshmctl,
1041	(sy_call_t *)shmdt, (sy_call_t *)shmget,
1042	(sy_call_t *)freebsd7_shmctl
1043};
1044
1045int
1046shmsys(td, uap)
1047	struct thread *td;
1048	/* XXX actually varargs. */
1049	struct shmsys_args /* {
1050		int	which;
1051		int	a2;
1052		int	a3;
1053		int	a4;
1054	} */ *uap;
1055{
1056	int error;
1057
1058	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
1059		return (ENOSYS);
1060	if (uap->which < 0 ||
1061	    uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
1062		return (EINVAL);
1063	mtx_lock(&Giant);
1064	error = (*shmcalls[uap->which])(td, &uap->a2);
1065	mtx_unlock(&Giant);
1066	return (error);
1067}
1068
1069#endif	/* i386 && (COMPAT_FREEBSD4 || COMPAT_43) */
1070
1071#ifdef COMPAT_FREEBSD32
1072
1073int
1074freebsd32_shmsys(struct thread *td, struct freebsd32_shmsys_args *uap)
1075{
1076
1077#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1078    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1079	switch (uap->which) {
1080	case 0:	{	/* shmat */
1081		struct shmat_args ap;
1082
1083		ap.shmid = uap->a2;
1084		ap.shmaddr = PTRIN(uap->a3);
1085		ap.shmflg = uap->a4;
1086		return (sysent[SYS_shmat].sy_call(td, &ap));
1087	}
1088	case 2: {	/* shmdt */
1089		struct shmdt_args ap;
1090
1091		ap.shmaddr = PTRIN(uap->a2);
1092		return (sysent[SYS_shmdt].sy_call(td, &ap));
1093	}
1094	case 3: {	/* shmget */
1095		struct shmget_args ap;
1096
1097		ap.key = uap->a2;
1098		ap.size = uap->a3;
1099		ap.shmflg = uap->a4;
1100		return (sysent[SYS_shmget].sy_call(td, &ap));
1101	}
1102	case 4: {	/* shmctl */
1103		struct freebsd7_freebsd32_shmctl_args ap;
1104
1105		ap.shmid = uap->a2;
1106		ap.cmd = uap->a3;
1107		ap.buf = PTRIN(uap->a4);
1108		return (freebsd7_freebsd32_shmctl(td, &ap));
1109	}
1110	case 1:		/* oshmctl */
1111	default:
1112		return (EINVAL);
1113	}
1114#else
1115	return (nosys(td, NULL));
1116#endif
1117}
1118
1119#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1120    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1121int
1122freebsd7_freebsd32_shmctl(struct thread *td,
1123    struct freebsd7_freebsd32_shmctl_args *uap)
1124{
1125	int error = 0;
1126	union {
1127		struct shmid_ds shmid_ds;
1128		struct shm_info shm_info;
1129		struct shminfo shminfo;
1130	} u;
1131	union {
1132		struct shmid_ds32_old shmid_ds32;
1133		struct shm_info32 shm_info32;
1134		struct shminfo32 shminfo32;
1135	} u32;
1136	size_t sz;
1137
1138	if (uap->cmd == IPC_SET) {
1139		if ((error = copyin(uap->buf, &u32.shmid_ds32,
1140		    sizeof(u32.shmid_ds32))))
1141			goto done;
1142		freebsd32_ipcperm_old_in(&u32.shmid_ds32.shm_perm,
1143		    &u.shmid_ds.shm_perm);
1144		CP(u32.shmid_ds32, u.shmid_ds, shm_segsz);
1145		CP(u32.shmid_ds32, u.shmid_ds, shm_lpid);
1146		CP(u32.shmid_ds32, u.shmid_ds, shm_cpid);
1147		CP(u32.shmid_ds32, u.shmid_ds, shm_nattch);
1148		CP(u32.shmid_ds32, u.shmid_ds, shm_atime);
1149		CP(u32.shmid_ds32, u.shmid_ds, shm_dtime);
1150		CP(u32.shmid_ds32, u.shmid_ds, shm_ctime);
1151	}
1152
1153	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&u, &sz);
1154	if (error)
1155		goto done;
1156
1157	/* Cases in which we need to copyout */
1158	switch (uap->cmd) {
1159	case IPC_INFO:
1160		CP(u.shminfo, u32.shminfo32, shmmax);
1161		CP(u.shminfo, u32.shminfo32, shmmin);
1162		CP(u.shminfo, u32.shminfo32, shmmni);
1163		CP(u.shminfo, u32.shminfo32, shmseg);
1164		CP(u.shminfo, u32.shminfo32, shmall);
1165		error = copyout(&u32.shminfo32, uap->buf,
1166		    sizeof(u32.shminfo32));
1167		break;
1168	case SHM_INFO:
1169		CP(u.shm_info, u32.shm_info32, used_ids);
1170		CP(u.shm_info, u32.shm_info32, shm_rss);
1171		CP(u.shm_info, u32.shm_info32, shm_tot);
1172		CP(u.shm_info, u32.shm_info32, shm_swp);
1173		CP(u.shm_info, u32.shm_info32, swap_attempts);
1174		CP(u.shm_info, u32.shm_info32, swap_successes);
1175		error = copyout(&u32.shm_info32, uap->buf,
1176		    sizeof(u32.shm_info32));
1177		break;
1178	case SHM_STAT:
1179	case IPC_STAT:
1180		freebsd32_ipcperm_old_out(&u.shmid_ds.shm_perm,
1181		    &u32.shmid_ds32.shm_perm);
1182		if (u.shmid_ds.shm_segsz > INT32_MAX)
1183			u32.shmid_ds32.shm_segsz = INT32_MAX;
1184		else
1185			CP(u.shmid_ds, u32.shmid_ds32, shm_segsz);
1186		CP(u.shmid_ds, u32.shmid_ds32, shm_lpid);
1187		CP(u.shmid_ds, u32.shmid_ds32, shm_cpid);
1188		CP(u.shmid_ds, u32.shmid_ds32, shm_nattch);
1189		CP(u.shmid_ds, u32.shmid_ds32, shm_atime);
1190		CP(u.shmid_ds, u32.shmid_ds32, shm_dtime);
1191		CP(u.shmid_ds, u32.shmid_ds32, shm_ctime);
1192		u32.shmid_ds32.shm_internal = 0;
1193		error = copyout(&u32.shmid_ds32, uap->buf,
1194		    sizeof(u32.shmid_ds32));
1195		break;
1196	}
1197
1198done:
1199	if (error) {
1200		/* Invalidate the return value */
1201		td->td_retval[0] = -1;
1202	}
1203	return (error);
1204}
1205#endif
1206
1207int
1208freebsd32_shmctl(struct thread *td, struct freebsd32_shmctl_args *uap)
1209{
1210	int error = 0;
1211	union {
1212		struct shmid_ds shmid_ds;
1213		struct shm_info shm_info;
1214		struct shminfo shminfo;
1215	} u;
1216	union {
1217		struct shmid_ds32 shmid_ds32;
1218		struct shm_info32 shm_info32;
1219		struct shminfo32 shminfo32;
1220	} u32;
1221	size_t sz;
1222
1223	if (uap->cmd == IPC_SET) {
1224		if ((error = copyin(uap->buf, &u32.shmid_ds32,
1225		    sizeof(u32.shmid_ds32))))
1226			goto done;
1227		freebsd32_ipcperm_in(&u32.shmid_ds32.shm_perm,
1228		    &u.shmid_ds.shm_perm);
1229		CP(u32.shmid_ds32, u.shmid_ds, shm_segsz);
1230		CP(u32.shmid_ds32, u.shmid_ds, shm_lpid);
1231		CP(u32.shmid_ds32, u.shmid_ds, shm_cpid);
1232		CP(u32.shmid_ds32, u.shmid_ds, shm_nattch);
1233		CP(u32.shmid_ds32, u.shmid_ds, shm_atime);
1234		CP(u32.shmid_ds32, u.shmid_ds, shm_dtime);
1235		CP(u32.shmid_ds32, u.shmid_ds, shm_ctime);
1236	}
1237
1238	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&u, &sz);
1239	if (error)
1240		goto done;
1241
1242	/* Cases in which we need to copyout */
1243	switch (uap->cmd) {
1244	case IPC_INFO:
1245		CP(u.shminfo, u32.shminfo32, shmmax);
1246		CP(u.shminfo, u32.shminfo32, shmmin);
1247		CP(u.shminfo, u32.shminfo32, shmmni);
1248		CP(u.shminfo, u32.shminfo32, shmseg);
1249		CP(u.shminfo, u32.shminfo32, shmall);
1250		error = copyout(&u32.shminfo32, uap->buf,
1251		    sizeof(u32.shminfo32));
1252		break;
1253	case SHM_INFO:
1254		CP(u.shm_info, u32.shm_info32, used_ids);
1255		CP(u.shm_info, u32.shm_info32, shm_rss);
1256		CP(u.shm_info, u32.shm_info32, shm_tot);
1257		CP(u.shm_info, u32.shm_info32, shm_swp);
1258		CP(u.shm_info, u32.shm_info32, swap_attempts);
1259		CP(u.shm_info, u32.shm_info32, swap_successes);
1260		error = copyout(&u32.shm_info32, uap->buf,
1261		    sizeof(u32.shm_info32));
1262		break;
1263	case SHM_STAT:
1264	case IPC_STAT:
1265		freebsd32_ipcperm_out(&u.shmid_ds.shm_perm,
1266		    &u32.shmid_ds32.shm_perm);
1267		if (u.shmid_ds.shm_segsz > INT32_MAX)
1268			u32.shmid_ds32.shm_segsz = INT32_MAX;
1269		else
1270			CP(u.shmid_ds, u32.shmid_ds32, shm_segsz);
1271		CP(u.shmid_ds, u32.shmid_ds32, shm_lpid);
1272		CP(u.shmid_ds, u32.shmid_ds32, shm_cpid);
1273		CP(u.shmid_ds, u32.shmid_ds32, shm_nattch);
1274		CP(u.shmid_ds, u32.shmid_ds32, shm_atime);
1275		CP(u.shmid_ds, u32.shmid_ds32, shm_dtime);
1276		CP(u.shmid_ds, u32.shmid_ds32, shm_ctime);
1277		error = copyout(&u32.shmid_ds32, uap->buf,
1278		    sizeof(u32.shmid_ds32));
1279		break;
1280	}
1281
1282done:
1283	if (error) {
1284		/* Invalidate the return value */
1285		td->td_retval[0] = -1;
1286	}
1287	return (error);
1288}
1289#endif
1290
1291#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1292    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1293
1294#ifndef CP
1295#define CP(src, dst, fld)	do { (dst).fld = (src).fld; } while (0)
1296#endif
1297
1298#ifndef _SYS_SYSPROTO_H_
1299struct freebsd7_shmctl_args {
1300	int shmid;
1301	int cmd;
1302	struct shmid_ds_old *buf;
1303};
1304#endif
1305int
1306freebsd7_shmctl(td, uap)
1307	struct thread *td;
1308	struct freebsd7_shmctl_args *uap;
1309{
1310	int error = 0;
1311	struct shmid_ds_old old;
1312	struct shmid_ds buf;
1313	size_t bufsz;
1314
1315	/*
1316	 * The only reason IPC_INFO, SHM_INFO, SHM_STAT exists is to support
1317	 * Linux binaries.  If we see the call come through the FreeBSD ABI,
1318	 * return an error back to the user since we do not to support this.
1319	 */
1320	if (uap->cmd == IPC_INFO || uap->cmd == SHM_INFO ||
1321	    uap->cmd == SHM_STAT)
1322		return (EINVAL);
1323
1324	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
1325	if (uap->cmd == IPC_SET) {
1326		if ((error = copyin(uap->buf, &old, sizeof(old))))
1327			goto done;
1328		ipcperm_old2new(&old.shm_perm, &buf.shm_perm);
1329		CP(old, buf, shm_segsz);
1330		CP(old, buf, shm_lpid);
1331		CP(old, buf, shm_cpid);
1332		CP(old, buf, shm_nattch);
1333		CP(old, buf, shm_atime);
1334		CP(old, buf, shm_dtime);
1335		CP(old, buf, shm_ctime);
1336	}
1337
1338	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
1339	if (error)
1340		goto done;
1341
1342	/* Cases in which we need to copyout */
1343	switch (uap->cmd) {
1344	case IPC_STAT:
1345		ipcperm_new2old(&buf.shm_perm, &old.shm_perm);
1346		if (buf.shm_segsz > INT_MAX)
1347			old.shm_segsz = INT_MAX;
1348		else
1349			CP(buf, old, shm_segsz);
1350		CP(buf, old, shm_lpid);
1351		CP(buf, old, shm_cpid);
1352		if (buf.shm_nattch > SHRT_MAX)
1353			old.shm_nattch = SHRT_MAX;
1354		else
1355			CP(buf, old, shm_nattch);
1356		CP(buf, old, shm_atime);
1357		CP(buf, old, shm_dtime);
1358		CP(buf, old, shm_ctime);
1359		old.shm_internal = NULL;
1360		error = copyout(&old, uap->buf, sizeof(old));
1361		break;
1362	}
1363
1364done:
1365	if (error) {
1366		/* Invalidate the return value */
1367		td->td_retval[0] = -1;
1368	}
1369	return (error);
1370}
1371
1372#endif	/* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 ||
1373	   COMPAT_FREEBSD7 */
1374
1375static int
1376sysvshm_modload(struct module *module, int cmd, void *arg)
1377{
1378	int error = 0;
1379
1380	switch (cmd) {
1381	case MOD_LOAD:
1382		error = shminit();
1383		if (error != 0)
1384			shmunload();
1385		break;
1386	case MOD_UNLOAD:
1387		error = shmunload();
1388		break;
1389	case MOD_SHUTDOWN:
1390		break;
1391	default:
1392		error = EINVAL;
1393		break;
1394	}
1395	return (error);
1396}
1397
1398static moduledata_t sysvshm_mod = {
1399	"sysvshm",
1400	&sysvshm_modload,
1401	NULL
1402};
1403
1404DECLARE_MODULE(sysvshm, sysvshm_mod, SI_SUB_SYSV_SHM, SI_ORDER_FIRST);
1405MODULE_VERSION(sysvshm, 1);
1406