1/*	$NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $	*/
2/*-
3 * Copyright (c) 1994 Adam Glass and Charles Hannum.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by Adam Glass and Charles
16 *	Hannum.
17 * 4. The names of the authors may not be used to endorse or promote products
18 *    derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31/*-
32 * Copyright (c) 2003-2005 McAfee, Inc.
33 * All rights reserved.
34 *
35 * This software was developed for the FreeBSD Project in part by McAfee
36 * Research, the Security Research Division of McAfee, Inc under DARPA/SPAWAR
37 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS research
38 * program.
39 *
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
42 * are met:
43 * 1. Redistributions of source code must retain the above copyright
44 *    notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 *    notice, this list of conditions and the following disclaimer in the
47 *    documentation and/or other materials provided with the distribution.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 */
61
62#include <sys/cdefs.h>
63__FBSDID("$FreeBSD$");
64
65#include "opt_compat.h"
66#include "opt_sysvipc.h"
67
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/kernel.h>
71#include <sys/limits.h>
72#include <sys/lock.h>
73#include <sys/sysctl.h>
74#include <sys/shm.h>
75#include <sys/proc.h>
76#include <sys/malloc.h>
77#include <sys/mman.h>
78#include <sys/module.h>
79#include <sys/mutex.h>
80#include <sys/racct.h>
81#include <sys/resourcevar.h>
82#include <sys/stat.h>
83#include <sys/syscall.h>
84#include <sys/syscallsubr.h>
85#include <sys/sysent.h>
86#include <sys/sysproto.h>
87#include <sys/jail.h>
88
89#include <security/mac/mac_framework.h>
90
91#include <vm/vm.h>
92#include <vm/vm_param.h>
93#include <vm/pmap.h>
94#include <vm/vm_object.h>
95#include <vm/vm_map.h>
96#include <vm/vm_page.h>
97#include <vm/vm_pager.h>
98
99FEATURE(sysv_shm, "System V shared memory segments support");
100
101static MALLOC_DEFINE(M_SHM, "shm", "SVID compatible shared memory segments");
102
103static int shmget_allocate_segment(struct thread *td,
104    struct shmget_args *uap, int mode);
105static int shmget_existing(struct thread *td, struct shmget_args *uap,
106    int mode, int segnum);
107
108#define	SHMSEG_FREE     	0x0200
109#define	SHMSEG_REMOVED  	0x0400
110#define	SHMSEG_ALLOCATED	0x0800
111#define	SHMSEG_WANTED		0x1000
112
113static int shm_last_free, shm_nused, shmalloced;
114vm_size_t shm_committed;
115static struct shmid_kernel	*shmsegs;
116
117struct shmmap_state {
118	vm_offset_t va;
119	int shmid;
120};
121
122static void shm_deallocate_segment(struct shmid_kernel *);
123static int shm_find_segment_by_key(key_t);
124static struct shmid_kernel *shm_find_segment_by_shmid(int);
125static struct shmid_kernel *shm_find_segment_by_shmidx(int);
126static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
127static void shmrealloc(void);
128static int shminit(void);
129static int sysvshm_modload(struct module *, int, void *);
130static int shmunload(void);
131static void shmexit_myhook(struct vmspace *vm);
132static void shmfork_myhook(struct proc *p1, struct proc *p2);
133static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
134
135/*
136 * Tuneable values.
137 */
138#ifndef SHMMAXPGS
139#define	SHMMAXPGS	131072	/* Note: sysv shared memory is swap backed. */
140#endif
141#ifndef SHMMAX
142#define	SHMMAX	(SHMMAXPGS*PAGE_SIZE)
143#endif
144#ifndef SHMMIN
145#define	SHMMIN	1
146#endif
147#ifndef SHMMNI
148#define	SHMMNI	192
149#endif
150#ifndef SHMSEG
151#define	SHMSEG	128
152#endif
153#ifndef SHMALL
154#define	SHMALL	(SHMMAXPGS)
155#endif
156
157struct	shminfo shminfo = {
158	SHMMAX,
159	SHMMIN,
160	SHMMNI,
161	SHMSEG,
162	SHMALL
163};
164
165static int shm_use_phys;
166static int shm_allow_removed;
167
168SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0,
169    "Maximum shared memory segment size");
170SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0,
171    "Minimum shared memory segment size");
172SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RDTUN, &shminfo.shmmni, 0,
173    "Number of shared memory identifiers");
174SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RDTUN, &shminfo.shmseg, 0,
175    "Number of segments per process");
176SYSCTL_ULONG(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0,
177    "Maximum number of pages available for shared memory");
178SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW,
179    &shm_use_phys, 0, "Enable/Disable locking of shared memory pages in core");
180SYSCTL_INT(_kern_ipc, OID_AUTO, shm_allow_removed, CTLFLAG_RW,
181    &shm_allow_removed, 0,
182    "Enable/Disable attachment to attached segments marked for removal");
183SYSCTL_PROC(_kern_ipc, OID_AUTO, shmsegs, CTLTYPE_OPAQUE | CTLFLAG_RD,
184    NULL, 0, sysctl_shmsegs, "",
185    "Current number of shared memory segments allocated");
186
187static int
188shm_find_segment_by_key(key)
189	key_t key;
190{
191	int i;
192
193	for (i = 0; i < shmalloced; i++)
194		if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
195		    shmsegs[i].u.shm_perm.key == key)
196			return (i);
197	return (-1);
198}
199
200static struct shmid_kernel *
201shm_find_segment_by_shmid(int shmid)
202{
203	int segnum;
204	struct shmid_kernel *shmseg;
205
206	segnum = IPCID_TO_IX(shmid);
207	if (segnum < 0 || segnum >= shmalloced)
208		return (NULL);
209	shmseg = &shmsegs[segnum];
210	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
211	    (!shm_allow_removed &&
212	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) ||
213	    shmseg->u.shm_perm.seq != IPCID_TO_SEQ(shmid))
214		return (NULL);
215	return (shmseg);
216}
217
218static struct shmid_kernel *
219shm_find_segment_by_shmidx(int segnum)
220{
221	struct shmid_kernel *shmseg;
222
223	if (segnum < 0 || segnum >= shmalloced)
224		return (NULL);
225	shmseg = &shmsegs[segnum];
226	if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
227	    (!shm_allow_removed &&
228	     (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0))
229		return (NULL);
230	return (shmseg);
231}
232
233static void
234shm_deallocate_segment(shmseg)
235	struct shmid_kernel *shmseg;
236{
237	vm_size_t size;
238
239	GIANT_REQUIRED;
240
241	vm_object_deallocate(shmseg->object);
242	shmseg->object = NULL;
243	size = round_page(shmseg->u.shm_segsz);
244	shm_committed -= btoc(size);
245	shm_nused--;
246	shmseg->u.shm_perm.mode = SHMSEG_FREE;
247#ifdef MAC
248	mac_sysvshm_cleanup(shmseg);
249#endif
250	racct_sub_cred(shmseg->cred, RACCT_NSHM, 1);
251	racct_sub_cred(shmseg->cred, RACCT_SHMSIZE, size);
252	crfree(shmseg->cred);
253	shmseg->cred = NULL;
254}
255
256static int
257shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
258{
259	struct shmid_kernel *shmseg;
260	int segnum, result;
261	vm_size_t size;
262
263	GIANT_REQUIRED;
264
265	segnum = IPCID_TO_IX(shmmap_s->shmid);
266	shmseg = &shmsegs[segnum];
267	size = round_page(shmseg->u.shm_segsz);
268	result = vm_map_remove(&vm->vm_map, shmmap_s->va, shmmap_s->va + size);
269	if (result != KERN_SUCCESS)
270		return (EINVAL);
271	shmmap_s->shmid = -1;
272	shmseg->u.shm_dtime = time_second;
273	if ((--shmseg->u.shm_nattch <= 0) &&
274	    (shmseg->u.shm_perm.mode & SHMSEG_REMOVED)) {
275		shm_deallocate_segment(shmseg);
276		shm_last_free = segnum;
277	}
278	return (0);
279}
280
281#ifndef _SYS_SYSPROTO_H_
282struct shmdt_args {
283	const void *shmaddr;
284};
285#endif
286int
287sys_shmdt(td, uap)
288	struct thread *td;
289	struct shmdt_args *uap;
290{
291	struct proc *p = td->td_proc;
292	struct shmmap_state *shmmap_s;
293#ifdef MAC
294	struct shmid_kernel *shmsegptr;
295#endif
296	int i;
297	int error = 0;
298
299	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
300		return (ENOSYS);
301	mtx_lock(&Giant);
302	shmmap_s = p->p_vmspace->vm_shm;
303 	if (shmmap_s == NULL) {
304		error = EINVAL;
305		goto done2;
306	}
307	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++) {
308		if (shmmap_s->shmid != -1 &&
309		    shmmap_s->va == (vm_offset_t)uap->shmaddr) {
310			break;
311		}
312	}
313	if (i == shminfo.shmseg) {
314		error = EINVAL;
315		goto done2;
316	}
317#ifdef MAC
318	shmsegptr = &shmsegs[IPCID_TO_IX(shmmap_s->shmid)];
319	error = mac_sysvshm_check_shmdt(td->td_ucred, shmsegptr);
320	if (error != 0)
321		goto done2;
322#endif
323	error = shm_delete_mapping(p->p_vmspace, shmmap_s);
324done2:
325	mtx_unlock(&Giant);
326	return (error);
327}
328
329#ifndef _SYS_SYSPROTO_H_
330struct shmat_args {
331	int shmid;
332	const void *shmaddr;
333	int shmflg;
334};
335#endif
336int
337kern_shmat(td, shmid, shmaddr, shmflg)
338	struct thread *td;
339	int shmid;
340	const void *shmaddr;
341	int shmflg;
342{
343	struct proc *p = td->td_proc;
344	int i, flags;
345	struct shmid_kernel *shmseg;
346	struct shmmap_state *shmmap_s = NULL;
347	vm_offset_t attach_va;
348	vm_prot_t prot;
349	vm_size_t size;
350	int rv;
351	int error = 0;
352
353	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
354		return (ENOSYS);
355	mtx_lock(&Giant);
356	shmmap_s = p->p_vmspace->vm_shm;
357	if (shmmap_s == NULL) {
358		shmmap_s = malloc(shminfo.shmseg * sizeof(struct shmmap_state),
359		    M_SHM, M_WAITOK);
360		for (i = 0; i < shminfo.shmseg; i++)
361			shmmap_s[i].shmid = -1;
362		p->p_vmspace->vm_shm = shmmap_s;
363	}
364	shmseg = shm_find_segment_by_shmid(shmid);
365	if (shmseg == NULL) {
366		error = EINVAL;
367		goto done2;
368	}
369	error = ipcperm(td, &shmseg->u.shm_perm,
370	    (shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
371	if (error)
372		goto done2;
373#ifdef MAC
374	error = mac_sysvshm_check_shmat(td->td_ucred, shmseg, shmflg);
375	if (error != 0)
376		goto done2;
377#endif
378	for (i = 0; i < shminfo.shmseg; i++) {
379		if (shmmap_s->shmid == -1)
380			break;
381		shmmap_s++;
382	}
383	if (i >= shminfo.shmseg) {
384		error = EMFILE;
385		goto done2;
386	}
387	size = round_page(shmseg->u.shm_segsz);
388	prot = VM_PROT_READ;
389	if ((shmflg & SHM_RDONLY) == 0)
390		prot |= VM_PROT_WRITE;
391	flags = MAP_ANON | MAP_SHARED;
392	if (shmaddr) {
393		flags |= MAP_FIXED;
394		if (shmflg & SHM_RND) {
395			attach_va = (vm_offset_t)shmaddr & ~(SHMLBA-1);
396		} else if (((vm_offset_t)shmaddr & (SHMLBA-1)) == 0) {
397			attach_va = (vm_offset_t)shmaddr;
398		} else {
399			error = EINVAL;
400			goto done2;
401		}
402	} else {
403		/*
404		 * This is just a hint to vm_map_find() about where to
405		 * put it.
406		 */
407		PROC_LOCK(p);
408		attach_va = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
409		    lim_max(p, RLIMIT_DATA));
410		PROC_UNLOCK(p);
411	}
412
413	vm_object_reference(shmseg->object);
414	rv = vm_map_find(&p->p_vmspace->vm_map, shmseg->object,
415	    0, &attach_va, size, (flags & MAP_FIXED) ? VMFS_NO_SPACE :
416	    VMFS_OPTIMAL_SPACE, prot, prot, MAP_INHERIT_SHARE);
417	if (rv != KERN_SUCCESS) {
418		vm_object_deallocate(shmseg->object);
419		error = ENOMEM;
420		goto done2;
421	}
422
423	shmmap_s->va = attach_va;
424	shmmap_s->shmid = shmid;
425	shmseg->u.shm_lpid = p->p_pid;
426	shmseg->u.shm_atime = time_second;
427	shmseg->u.shm_nattch++;
428	td->td_retval[0] = attach_va;
429done2:
430	mtx_unlock(&Giant);
431	return (error);
432}
433
434int
435sys_shmat(td, uap)
436	struct thread *td;
437	struct shmat_args *uap;
438{
439	return kern_shmat(td, uap->shmid, uap->shmaddr, uap->shmflg);
440}
441
442int
443kern_shmctl(td, shmid, cmd, buf, bufsz)
444	struct thread *td;
445	int shmid;
446	int cmd;
447	void *buf;
448	size_t *bufsz;
449{
450	int error = 0;
451	struct shmid_kernel *shmseg;
452
453	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
454		return (ENOSYS);
455
456	mtx_lock(&Giant);
457	switch (cmd) {
458	/*
459	 * It is possible that kern_shmctl is being called from the Linux ABI
460	 * layer, in which case, we will need to implement IPC_INFO.  It should
461	 * be noted that other shmctl calls will be funneled through here for
462	 * Linix binaries as well.
463	 *
464	 * NB: The Linux ABI layer will convert this data to structure(s) more
465	 * consistent with the Linux ABI.
466	 */
467	case IPC_INFO:
468		memcpy(buf, &shminfo, sizeof(shminfo));
469		if (bufsz)
470			*bufsz = sizeof(shminfo);
471		td->td_retval[0] = shmalloced;
472		goto done2;
473	case SHM_INFO: {
474		struct shm_info shm_info;
475		shm_info.used_ids = shm_nused;
476		shm_info.shm_rss = 0;	/*XXX where to get from ? */
477		shm_info.shm_tot = 0;	/*XXX where to get from ? */
478		shm_info.shm_swp = 0;	/*XXX where to get from ? */
479		shm_info.swap_attempts = 0;	/*XXX where to get from ? */
480		shm_info.swap_successes = 0;	/*XXX where to get from ? */
481		memcpy(buf, &shm_info, sizeof(shm_info));
482		if (bufsz)
483			*bufsz = sizeof(shm_info);
484		td->td_retval[0] = shmalloced;
485		goto done2;
486	}
487	}
488	if (cmd == SHM_STAT)
489		shmseg = shm_find_segment_by_shmidx(shmid);
490	else
491		shmseg = shm_find_segment_by_shmid(shmid);
492	if (shmseg == NULL) {
493		error = EINVAL;
494		goto done2;
495	}
496#ifdef MAC
497	error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, cmd);
498	if (error != 0)
499		goto done2;
500#endif
501	switch (cmd) {
502	case SHM_STAT:
503	case IPC_STAT:
504		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
505		if (error)
506			goto done2;
507		memcpy(buf, &shmseg->u, sizeof(struct shmid_ds));
508		if (bufsz)
509			*bufsz = sizeof(struct shmid_ds);
510		if (cmd == SHM_STAT)
511			td->td_retval[0] = IXSEQ_TO_IPCID(shmid, shmseg->u.shm_perm);
512		break;
513	case IPC_SET: {
514		struct shmid_ds *shmid;
515
516		shmid = (struct shmid_ds *)buf;
517		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
518		if (error)
519			goto done2;
520		shmseg->u.shm_perm.uid = shmid->shm_perm.uid;
521		shmseg->u.shm_perm.gid = shmid->shm_perm.gid;
522		shmseg->u.shm_perm.mode =
523		    (shmseg->u.shm_perm.mode & ~ACCESSPERMS) |
524		    (shmid->shm_perm.mode & ACCESSPERMS);
525		shmseg->u.shm_ctime = time_second;
526		break;
527	}
528	case IPC_RMID:
529		error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
530		if (error)
531			goto done2;
532		shmseg->u.shm_perm.key = IPC_PRIVATE;
533		shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
534		if (shmseg->u.shm_nattch <= 0) {
535			shm_deallocate_segment(shmseg);
536			shm_last_free = IPCID_TO_IX(shmid);
537		}
538		break;
539#if 0
540	case SHM_LOCK:
541	case SHM_UNLOCK:
542#endif
543	default:
544		error = EINVAL;
545		break;
546	}
547done2:
548	mtx_unlock(&Giant);
549	return (error);
550}
551
552#ifndef _SYS_SYSPROTO_H_
553struct shmctl_args {
554	int shmid;
555	int cmd;
556	struct shmid_ds *buf;
557};
558#endif
559int
560sys_shmctl(td, uap)
561	struct thread *td;
562	struct shmctl_args *uap;
563{
564	int error = 0;
565	struct shmid_ds buf;
566	size_t bufsz;
567
568	/*
569	 * The only reason IPC_INFO, SHM_INFO, SHM_STAT exists is to support
570	 * Linux binaries.  If we see the call come through the FreeBSD ABI,
571	 * return an error back to the user since we do not to support this.
572	 */
573	if (uap->cmd == IPC_INFO || uap->cmd == SHM_INFO ||
574	    uap->cmd == SHM_STAT)
575		return (EINVAL);
576
577	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
578	if (uap->cmd == IPC_SET) {
579		if ((error = copyin(uap->buf, &buf, sizeof(struct shmid_ds))))
580			goto done;
581	}
582
583	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
584	if (error)
585		goto done;
586
587	/* Cases in which we need to copyout */
588	switch (uap->cmd) {
589	case IPC_STAT:
590		error = copyout(&buf, uap->buf, bufsz);
591		break;
592	}
593
594done:
595	if (error) {
596		/* Invalidate the return value */
597		td->td_retval[0] = -1;
598	}
599	return (error);
600}
601
602
603static int
604shmget_existing(td, uap, mode, segnum)
605	struct thread *td;
606	struct shmget_args *uap;
607	int mode;
608	int segnum;
609{
610	struct shmid_kernel *shmseg;
611	int error;
612
613	shmseg = &shmsegs[segnum];
614	if (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) {
615		/*
616		 * This segment is in the process of being allocated.  Wait
617		 * until it's done, and look the key up again (in case the
618		 * allocation failed or it was freed).
619		 */
620		shmseg->u.shm_perm.mode |= SHMSEG_WANTED;
621		error = tsleep(shmseg, PLOCK | PCATCH, "shmget", 0);
622		if (error)
623			return (error);
624		return (EAGAIN);
625	}
626	if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
627		return (EEXIST);
628#ifdef MAC
629	error = mac_sysvshm_check_shmget(td->td_ucred, shmseg, uap->shmflg);
630	if (error != 0)
631		return (error);
632#endif
633	if (uap->size != 0 && uap->size > shmseg->u.shm_segsz)
634		return (EINVAL);
635	td->td_retval[0] = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
636	return (0);
637}
638
639static int
640shmget_allocate_segment(td, uap, mode)
641	struct thread *td;
642	struct shmget_args *uap;
643	int mode;
644{
645	int i, segnum, shmid;
646	size_t size;
647	struct ucred *cred = td->td_ucred;
648	struct shmid_kernel *shmseg;
649	vm_object_t shm_object;
650
651	GIANT_REQUIRED;
652
653	if (uap->size < shminfo.shmmin || uap->size > shminfo.shmmax)
654		return (EINVAL);
655	if (shm_nused >= shminfo.shmmni) /* Any shmids left? */
656		return (ENOSPC);
657	size = round_page(uap->size);
658	if (shm_committed + btoc(size) > shminfo.shmall)
659		return (ENOMEM);
660	if (shm_last_free < 0) {
661		shmrealloc();	/* Maybe expand the shmsegs[] array. */
662		for (i = 0; i < shmalloced; i++)
663			if (shmsegs[i].u.shm_perm.mode & SHMSEG_FREE)
664				break;
665		if (i == shmalloced)
666			return (ENOSPC);
667		segnum = i;
668	} else  {
669		segnum = shm_last_free;
670		shm_last_free = -1;
671	}
672	shmseg = &shmsegs[segnum];
673#ifdef RACCT
674	PROC_LOCK(td->td_proc);
675	if (racct_add(td->td_proc, RACCT_NSHM, 1)) {
676		PROC_UNLOCK(td->td_proc);
677		return (ENOSPC);
678	}
679	if (racct_add(td->td_proc, RACCT_SHMSIZE, size)) {
680		racct_sub(td->td_proc, RACCT_NSHM, 1);
681		PROC_UNLOCK(td->td_proc);
682		return (ENOMEM);
683	}
684	PROC_UNLOCK(td->td_proc);
685#endif
686	/*
687	 * In case we sleep in malloc(), mark the segment present but deleted
688	 * so that noone else tries to create the same key.
689	 */
690	shmseg->u.shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
691	shmseg->u.shm_perm.key = uap->key;
692	shmseg->u.shm_perm.seq = (shmseg->u.shm_perm.seq + 1) & 0x7fff;
693	shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
694
695	/*
696	 * We make sure that we have allocated a pager before we need
697	 * to.
698	 */
699	shm_object = vm_pager_allocate(shm_use_phys ? OBJT_PHYS : OBJT_SWAP,
700	    0, size, VM_PROT_DEFAULT, 0, cred);
701	if (shm_object == NULL) {
702#ifdef RACCT
703		PROC_LOCK(td->td_proc);
704		racct_sub(td->td_proc, RACCT_NSHM, 1);
705		racct_sub(td->td_proc, RACCT_SHMSIZE, size);
706		PROC_UNLOCK(td->td_proc);
707#endif
708		return (ENOMEM);
709	}
710	VM_OBJECT_LOCK(shm_object);
711	vm_object_clear_flag(shm_object, OBJ_ONEMAPPING);
712	vm_object_set_flag(shm_object, OBJ_NOSPLIT);
713	VM_OBJECT_UNLOCK(shm_object);
714
715	shmseg->object = shm_object;
716	shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = cred->cr_uid;
717	shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid;
718	shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) |
719	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
720	shmseg->cred = crhold(cred);
721	shmseg->u.shm_segsz = uap->size;
722	shmseg->u.shm_cpid = td->td_proc->p_pid;
723	shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0;
724	shmseg->u.shm_atime = shmseg->u.shm_dtime = 0;
725#ifdef MAC
726	mac_sysvshm_create(cred, shmseg);
727#endif
728	shmseg->u.shm_ctime = time_second;
729	shm_committed += btoc(size);
730	shm_nused++;
731	if (shmseg->u.shm_perm.mode & SHMSEG_WANTED) {
732		/*
733		 * Somebody else wanted this key while we were asleep.  Wake
734		 * them up now.
735		 */
736		shmseg->u.shm_perm.mode &= ~SHMSEG_WANTED;
737		wakeup(shmseg);
738	}
739	td->td_retval[0] = shmid;
740	return (0);
741}
742
743#ifndef _SYS_SYSPROTO_H_
744struct shmget_args {
745	key_t key;
746	size_t size;
747	int shmflg;
748};
749#endif
750int
751sys_shmget(td, uap)
752	struct thread *td;
753	struct shmget_args *uap;
754{
755	int segnum, mode;
756	int error;
757
758	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
759		return (ENOSYS);
760	mtx_lock(&Giant);
761	mode = uap->shmflg & ACCESSPERMS;
762	if (uap->key != IPC_PRIVATE) {
763	again:
764		segnum = shm_find_segment_by_key(uap->key);
765		if (segnum >= 0) {
766			error = shmget_existing(td, uap, mode, segnum);
767			if (error == EAGAIN)
768				goto again;
769			goto done2;
770		}
771		if ((uap->shmflg & IPC_CREAT) == 0) {
772			error = ENOENT;
773			goto done2;
774		}
775	}
776	error = shmget_allocate_segment(td, uap, mode);
777done2:
778	mtx_unlock(&Giant);
779	return (error);
780}
781
782static void
783shmfork_myhook(p1, p2)
784	struct proc *p1, *p2;
785{
786	struct shmmap_state *shmmap_s;
787	size_t size;
788	int i;
789
790	mtx_lock(&Giant);
791	size = shminfo.shmseg * sizeof(struct shmmap_state);
792	shmmap_s = malloc(size, M_SHM, M_WAITOK);
793	bcopy(p1->p_vmspace->vm_shm, shmmap_s, size);
794	p2->p_vmspace->vm_shm = shmmap_s;
795	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
796		if (shmmap_s->shmid != -1)
797			shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++;
798	mtx_unlock(&Giant);
799}
800
801static void
802shmexit_myhook(struct vmspace *vm)
803{
804	struct shmmap_state *base, *shm;
805	int i;
806
807	if ((base = vm->vm_shm) != NULL) {
808		vm->vm_shm = NULL;
809		mtx_lock(&Giant);
810		for (i = 0, shm = base; i < shminfo.shmseg; i++, shm++) {
811			if (shm->shmid != -1)
812				shm_delete_mapping(vm, shm);
813		}
814		mtx_unlock(&Giant);
815		free(base, M_SHM);
816	}
817}
818
819static void
820shmrealloc(void)
821{
822	int i;
823	struct shmid_kernel *newsegs;
824
825	if (shmalloced >= shminfo.shmmni)
826		return;
827
828	newsegs = malloc(shminfo.shmmni * sizeof(*newsegs), M_SHM, M_WAITOK);
829	if (newsegs == NULL)
830		return;
831	for (i = 0; i < shmalloced; i++)
832		bcopy(&shmsegs[i], &newsegs[i], sizeof(newsegs[0]));
833	for (; i < shminfo.shmmni; i++) {
834		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
835		shmsegs[i].u.shm_perm.seq = 0;
836#ifdef MAC
837		mac_sysvshm_init(&shmsegs[i]);
838#endif
839	}
840	free(shmsegs, M_SHM);
841	shmsegs = newsegs;
842	shmalloced = shminfo.shmmni;
843}
844
845static struct syscall_helper_data shm_syscalls[] = {
846	SYSCALL_INIT_HELPER(shmat),
847	SYSCALL_INIT_HELPER(shmctl),
848	SYSCALL_INIT_HELPER(shmdt),
849	SYSCALL_INIT_HELPER(shmget),
850#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
851    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
852	SYSCALL_INIT_HELPER_COMPAT(freebsd7_shmctl),
853#endif
854#if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
855	SYSCALL_INIT_HELPER(shmsys),
856#endif
857	SYSCALL_INIT_LAST
858};
859
860#ifdef COMPAT_FREEBSD32
861#include <compat/freebsd32/freebsd32.h>
862#include <compat/freebsd32/freebsd32_ipc.h>
863#include <compat/freebsd32/freebsd32_proto.h>
864#include <compat/freebsd32/freebsd32_signal.h>
865#include <compat/freebsd32/freebsd32_syscall.h>
866#include <compat/freebsd32/freebsd32_util.h>
867
868static struct syscall_helper_data shm32_syscalls[] = {
869	SYSCALL32_INIT_HELPER_COMPAT(shmat),
870	SYSCALL32_INIT_HELPER_COMPAT(shmdt),
871	SYSCALL32_INIT_HELPER_COMPAT(shmget),
872	SYSCALL32_INIT_HELPER(freebsd32_shmsys),
873	SYSCALL32_INIT_HELPER(freebsd32_shmctl),
874#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
875    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
876	SYSCALL32_INIT_HELPER(freebsd7_freebsd32_shmctl),
877#endif
878	SYSCALL_INIT_LAST
879};
880#endif
881
882static int
883shminit()
884{
885	int i, error;
886
887#ifndef BURN_BRIDGES
888	if (TUNABLE_ULONG_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall) != 0)
889		printf("kern.ipc.shmmaxpgs is now called kern.ipc.shmall!\n");
890#endif
891	TUNABLE_ULONG_FETCH("kern.ipc.shmall", &shminfo.shmall);
892
893	/* Initialize shmmax dealing with possible overflow. */
894	for (i = PAGE_SIZE; i > 0; i--) {
895		shminfo.shmmax = shminfo.shmall * i;
896		if (shminfo.shmmax >= shminfo.shmall)
897			break;
898	}
899
900	TUNABLE_ULONG_FETCH("kern.ipc.shmmin", &shminfo.shmmin);
901	TUNABLE_ULONG_FETCH("kern.ipc.shmmni", &shminfo.shmmni);
902	TUNABLE_ULONG_FETCH("kern.ipc.shmseg", &shminfo.shmseg);
903	TUNABLE_INT_FETCH("kern.ipc.shm_use_phys", &shm_use_phys);
904
905	shmalloced = shminfo.shmmni;
906	shmsegs = malloc(shmalloced * sizeof(shmsegs[0]), M_SHM, M_WAITOK);
907	for (i = 0; i < shmalloced; i++) {
908		shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
909		shmsegs[i].u.shm_perm.seq = 0;
910#ifdef MAC
911		mac_sysvshm_init(&shmsegs[i]);
912#endif
913	}
914	shm_last_free = 0;
915	shm_nused = 0;
916	shm_committed = 0;
917	shmexit_hook = &shmexit_myhook;
918	shmfork_hook = &shmfork_myhook;
919
920	error = syscall_helper_register(shm_syscalls);
921	if (error != 0)
922		return (error);
923#ifdef COMPAT_FREEBSD32
924	error = syscall32_helper_register(shm32_syscalls);
925	if (error != 0)
926		return (error);
927#endif
928	return (0);
929}
930
931static int
932shmunload()
933{
934	int i;
935
936	if (shm_nused > 0)
937		return (EBUSY);
938
939#ifdef COMPAT_FREEBSD32
940	syscall32_helper_unregister(shm32_syscalls);
941#endif
942	syscall_helper_unregister(shm_syscalls);
943
944	for (i = 0; i < shmalloced; i++) {
945#ifdef MAC
946		mac_sysvshm_destroy(&shmsegs[i]);
947#endif
948		/*
949		 * Objects might be still mapped into the processes
950		 * address spaces.  Actual free would happen on the
951		 * last mapping destruction.
952		 */
953		if (shmsegs[i].u.shm_perm.mode != SHMSEG_FREE)
954			vm_object_deallocate(shmsegs[i].object);
955	}
956	free(shmsegs, M_SHM);
957	shmexit_hook = NULL;
958	shmfork_hook = NULL;
959	return (0);
960}
961
962static int
963sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
964{
965
966	return (SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0])));
967}
968
969#if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
970struct oshmid_ds {
971	struct	ipc_perm_old shm_perm;	/* operation perms */
972	int	shm_segsz;		/* size of segment (bytes) */
973	u_short	shm_cpid;		/* pid, creator */
974	u_short	shm_lpid;		/* pid, last operation */
975	short	shm_nattch;		/* no. of current attaches */
976	time_t	shm_atime;		/* last attach time */
977	time_t	shm_dtime;		/* last detach time */
978	time_t	shm_ctime;		/* last change time */
979	void	*shm_handle;		/* internal handle for shm segment */
980};
981
982struct oshmctl_args {
983	int shmid;
984	int cmd;
985	struct oshmid_ds *ubuf;
986};
987
988static int
989oshmctl(struct thread *td, struct oshmctl_args *uap)
990{
991#ifdef COMPAT_43
992	int error = 0;
993	struct shmid_kernel *shmseg;
994	struct oshmid_ds outbuf;
995
996	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
997		return (ENOSYS);
998	mtx_lock(&Giant);
999	shmseg = shm_find_segment_by_shmid(uap->shmid);
1000	if (shmseg == NULL) {
1001		error = EINVAL;
1002		goto done2;
1003	}
1004	switch (uap->cmd) {
1005	case IPC_STAT:
1006		error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
1007		if (error)
1008			goto done2;
1009#ifdef MAC
1010		error = mac_sysvshm_check_shmctl(td->td_ucred, shmseg, uap->cmd);
1011		if (error != 0)
1012			goto done2;
1013#endif
1014		ipcperm_new2old(&shmseg->u.shm_perm, &outbuf.shm_perm);
1015		outbuf.shm_segsz = shmseg->u.shm_segsz;
1016		outbuf.shm_cpid = shmseg->u.shm_cpid;
1017		outbuf.shm_lpid = shmseg->u.shm_lpid;
1018		outbuf.shm_nattch = shmseg->u.shm_nattch;
1019		outbuf.shm_atime = shmseg->u.shm_atime;
1020		outbuf.shm_dtime = shmseg->u.shm_dtime;
1021		outbuf.shm_ctime = shmseg->u.shm_ctime;
1022		outbuf.shm_handle = shmseg->object;
1023		error = copyout(&outbuf, uap->ubuf, sizeof(outbuf));
1024		if (error)
1025			goto done2;
1026		break;
1027	default:
1028		error = freebsd7_shmctl(td, (struct freebsd7_shmctl_args *)uap);
1029		break;
1030	}
1031done2:
1032	mtx_unlock(&Giant);
1033	return (error);
1034#else
1035	return (EINVAL);
1036#endif
1037}
1038
1039/* XXX casting to (sy_call_t *) is bogus, as usual. */
1040static sy_call_t *shmcalls[] = {
1041	(sy_call_t *)sys_shmat, (sy_call_t *)oshmctl,
1042	(sy_call_t *)sys_shmdt, (sy_call_t *)sys_shmget,
1043	(sy_call_t *)freebsd7_shmctl
1044};
1045
1046int
1047sys_shmsys(td, uap)
1048	struct thread *td;
1049	/* XXX actually varargs. */
1050	struct shmsys_args /* {
1051		int	which;
1052		int	a2;
1053		int	a3;
1054		int	a4;
1055	} */ *uap;
1056{
1057	int error;
1058
1059	if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
1060		return (ENOSYS);
1061	if (uap->which < 0 ||
1062	    uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
1063		return (EINVAL);
1064	mtx_lock(&Giant);
1065	error = (*shmcalls[uap->which])(td, &uap->a2);
1066	mtx_unlock(&Giant);
1067	return (error);
1068}
1069
1070#endif	/* i386 && (COMPAT_FREEBSD4 || COMPAT_43) */
1071
1072#ifdef COMPAT_FREEBSD32
1073
1074int
1075freebsd32_shmsys(struct thread *td, struct freebsd32_shmsys_args *uap)
1076{
1077
1078#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1079    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1080	switch (uap->which) {
1081	case 0:	{	/* shmat */
1082		struct shmat_args ap;
1083
1084		ap.shmid = uap->a2;
1085		ap.shmaddr = PTRIN(uap->a3);
1086		ap.shmflg = uap->a4;
1087		return (sysent[SYS_shmat].sy_call(td, &ap));
1088	}
1089	case 2: {	/* shmdt */
1090		struct shmdt_args ap;
1091
1092		ap.shmaddr = PTRIN(uap->a2);
1093		return (sysent[SYS_shmdt].sy_call(td, &ap));
1094	}
1095	case 3: {	/* shmget */
1096		struct shmget_args ap;
1097
1098		ap.key = uap->a2;
1099		ap.size = uap->a3;
1100		ap.shmflg = uap->a4;
1101		return (sysent[SYS_shmget].sy_call(td, &ap));
1102	}
1103	case 4: {	/* shmctl */
1104		struct freebsd7_freebsd32_shmctl_args ap;
1105
1106		ap.shmid = uap->a2;
1107		ap.cmd = uap->a3;
1108		ap.buf = PTRIN(uap->a4);
1109		return (freebsd7_freebsd32_shmctl(td, &ap));
1110	}
1111	case 1:		/* oshmctl */
1112	default:
1113		return (EINVAL);
1114	}
1115#else
1116	return (nosys(td, NULL));
1117#endif
1118}
1119
1120#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1121    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1122int
1123freebsd7_freebsd32_shmctl(struct thread *td,
1124    struct freebsd7_freebsd32_shmctl_args *uap)
1125{
1126	int error = 0;
1127	union {
1128		struct shmid_ds shmid_ds;
1129		struct shm_info shm_info;
1130		struct shminfo shminfo;
1131	} u;
1132	union {
1133		struct shmid_ds32_old shmid_ds32;
1134		struct shm_info32 shm_info32;
1135		struct shminfo32 shminfo32;
1136	} u32;
1137	size_t sz;
1138
1139	if (uap->cmd == IPC_SET) {
1140		if ((error = copyin(uap->buf, &u32.shmid_ds32,
1141		    sizeof(u32.shmid_ds32))))
1142			goto done;
1143		freebsd32_ipcperm_old_in(&u32.shmid_ds32.shm_perm,
1144		    &u.shmid_ds.shm_perm);
1145		CP(u32.shmid_ds32, u.shmid_ds, shm_segsz);
1146		CP(u32.shmid_ds32, u.shmid_ds, shm_lpid);
1147		CP(u32.shmid_ds32, u.shmid_ds, shm_cpid);
1148		CP(u32.shmid_ds32, u.shmid_ds, shm_nattch);
1149		CP(u32.shmid_ds32, u.shmid_ds, shm_atime);
1150		CP(u32.shmid_ds32, u.shmid_ds, shm_dtime);
1151		CP(u32.shmid_ds32, u.shmid_ds, shm_ctime);
1152	}
1153
1154	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&u, &sz);
1155	if (error)
1156		goto done;
1157
1158	/* Cases in which we need to copyout */
1159	switch (uap->cmd) {
1160	case IPC_INFO:
1161		CP(u.shminfo, u32.shminfo32, shmmax);
1162		CP(u.shminfo, u32.shminfo32, shmmin);
1163		CP(u.shminfo, u32.shminfo32, shmmni);
1164		CP(u.shminfo, u32.shminfo32, shmseg);
1165		CP(u.shminfo, u32.shminfo32, shmall);
1166		error = copyout(&u32.shminfo32, uap->buf,
1167		    sizeof(u32.shminfo32));
1168		break;
1169	case SHM_INFO:
1170		CP(u.shm_info, u32.shm_info32, used_ids);
1171		CP(u.shm_info, u32.shm_info32, shm_rss);
1172		CP(u.shm_info, u32.shm_info32, shm_tot);
1173		CP(u.shm_info, u32.shm_info32, shm_swp);
1174		CP(u.shm_info, u32.shm_info32, swap_attempts);
1175		CP(u.shm_info, u32.shm_info32, swap_successes);
1176		error = copyout(&u32.shm_info32, uap->buf,
1177		    sizeof(u32.shm_info32));
1178		break;
1179	case SHM_STAT:
1180	case IPC_STAT:
1181		freebsd32_ipcperm_old_out(&u.shmid_ds.shm_perm,
1182		    &u32.shmid_ds32.shm_perm);
1183		if (u.shmid_ds.shm_segsz > INT32_MAX)
1184			u32.shmid_ds32.shm_segsz = INT32_MAX;
1185		else
1186			CP(u.shmid_ds, u32.shmid_ds32, shm_segsz);
1187		CP(u.shmid_ds, u32.shmid_ds32, shm_lpid);
1188		CP(u.shmid_ds, u32.shmid_ds32, shm_cpid);
1189		CP(u.shmid_ds, u32.shmid_ds32, shm_nattch);
1190		CP(u.shmid_ds, u32.shmid_ds32, shm_atime);
1191		CP(u.shmid_ds, u32.shmid_ds32, shm_dtime);
1192		CP(u.shmid_ds, u32.shmid_ds32, shm_ctime);
1193		u32.shmid_ds32.shm_internal = 0;
1194		error = copyout(&u32.shmid_ds32, uap->buf,
1195		    sizeof(u32.shmid_ds32));
1196		break;
1197	}
1198
1199done:
1200	if (error) {
1201		/* Invalidate the return value */
1202		td->td_retval[0] = -1;
1203	}
1204	return (error);
1205}
1206#endif
1207
1208int
1209freebsd32_shmctl(struct thread *td, struct freebsd32_shmctl_args *uap)
1210{
1211	int error = 0;
1212	union {
1213		struct shmid_ds shmid_ds;
1214		struct shm_info shm_info;
1215		struct shminfo shminfo;
1216	} u;
1217	union {
1218		struct shmid_ds32 shmid_ds32;
1219		struct shm_info32 shm_info32;
1220		struct shminfo32 shminfo32;
1221	} u32;
1222	size_t sz;
1223
1224	if (uap->cmd == IPC_SET) {
1225		if ((error = copyin(uap->buf, &u32.shmid_ds32,
1226		    sizeof(u32.shmid_ds32))))
1227			goto done;
1228		freebsd32_ipcperm_in(&u32.shmid_ds32.shm_perm,
1229		    &u.shmid_ds.shm_perm);
1230		CP(u32.shmid_ds32, u.shmid_ds, shm_segsz);
1231		CP(u32.shmid_ds32, u.shmid_ds, shm_lpid);
1232		CP(u32.shmid_ds32, u.shmid_ds, shm_cpid);
1233		CP(u32.shmid_ds32, u.shmid_ds, shm_nattch);
1234		CP(u32.shmid_ds32, u.shmid_ds, shm_atime);
1235		CP(u32.shmid_ds32, u.shmid_ds, shm_dtime);
1236		CP(u32.shmid_ds32, u.shmid_ds, shm_ctime);
1237	}
1238
1239	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&u, &sz);
1240	if (error)
1241		goto done;
1242
1243	/* Cases in which we need to copyout */
1244	switch (uap->cmd) {
1245	case IPC_INFO:
1246		CP(u.shminfo, u32.shminfo32, shmmax);
1247		CP(u.shminfo, u32.shminfo32, shmmin);
1248		CP(u.shminfo, u32.shminfo32, shmmni);
1249		CP(u.shminfo, u32.shminfo32, shmseg);
1250		CP(u.shminfo, u32.shminfo32, shmall);
1251		error = copyout(&u32.shminfo32, uap->buf,
1252		    sizeof(u32.shminfo32));
1253		break;
1254	case SHM_INFO:
1255		CP(u.shm_info, u32.shm_info32, used_ids);
1256		CP(u.shm_info, u32.shm_info32, shm_rss);
1257		CP(u.shm_info, u32.shm_info32, shm_tot);
1258		CP(u.shm_info, u32.shm_info32, shm_swp);
1259		CP(u.shm_info, u32.shm_info32, swap_attempts);
1260		CP(u.shm_info, u32.shm_info32, swap_successes);
1261		error = copyout(&u32.shm_info32, uap->buf,
1262		    sizeof(u32.shm_info32));
1263		break;
1264	case SHM_STAT:
1265	case IPC_STAT:
1266		freebsd32_ipcperm_out(&u.shmid_ds.shm_perm,
1267		    &u32.shmid_ds32.shm_perm);
1268		if (u.shmid_ds.shm_segsz > INT32_MAX)
1269			u32.shmid_ds32.shm_segsz = INT32_MAX;
1270		else
1271			CP(u.shmid_ds, u32.shmid_ds32, shm_segsz);
1272		CP(u.shmid_ds, u32.shmid_ds32, shm_lpid);
1273		CP(u.shmid_ds, u32.shmid_ds32, shm_cpid);
1274		CP(u.shmid_ds, u32.shmid_ds32, shm_nattch);
1275		CP(u.shmid_ds, u32.shmid_ds32, shm_atime);
1276		CP(u.shmid_ds, u32.shmid_ds32, shm_dtime);
1277		CP(u.shmid_ds, u32.shmid_ds32, shm_ctime);
1278		error = copyout(&u32.shmid_ds32, uap->buf,
1279		    sizeof(u32.shmid_ds32));
1280		break;
1281	}
1282
1283done:
1284	if (error) {
1285		/* Invalidate the return value */
1286		td->td_retval[0] = -1;
1287	}
1288	return (error);
1289}
1290#endif
1291
1292#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
1293    defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
1294
1295#ifndef CP
1296#define CP(src, dst, fld)	do { (dst).fld = (src).fld; } while (0)
1297#endif
1298
1299#ifndef _SYS_SYSPROTO_H_
1300struct freebsd7_shmctl_args {
1301	int shmid;
1302	int cmd;
1303	struct shmid_ds_old *buf;
1304};
1305#endif
1306int
1307freebsd7_shmctl(td, uap)
1308	struct thread *td;
1309	struct freebsd7_shmctl_args *uap;
1310{
1311	int error = 0;
1312	struct shmid_ds_old old;
1313	struct shmid_ds buf;
1314	size_t bufsz;
1315
1316	/*
1317	 * The only reason IPC_INFO, SHM_INFO, SHM_STAT exists is to support
1318	 * Linux binaries.  If we see the call come through the FreeBSD ABI,
1319	 * return an error back to the user since we do not to support this.
1320	 */
1321	if (uap->cmd == IPC_INFO || uap->cmd == SHM_INFO ||
1322	    uap->cmd == SHM_STAT)
1323		return (EINVAL);
1324
1325	/* IPC_SET needs to copyin the buffer before calling kern_shmctl */
1326	if (uap->cmd == IPC_SET) {
1327		if ((error = copyin(uap->buf, &old, sizeof(old))))
1328			goto done;
1329		ipcperm_old2new(&old.shm_perm, &buf.shm_perm);
1330		CP(old, buf, shm_segsz);
1331		CP(old, buf, shm_lpid);
1332		CP(old, buf, shm_cpid);
1333		CP(old, buf, shm_nattch);
1334		CP(old, buf, shm_atime);
1335		CP(old, buf, shm_dtime);
1336		CP(old, buf, shm_ctime);
1337	}
1338
1339	error = kern_shmctl(td, uap->shmid, uap->cmd, (void *)&buf, &bufsz);
1340	if (error)
1341		goto done;
1342
1343	/* Cases in which we need to copyout */
1344	switch (uap->cmd) {
1345	case IPC_STAT:
1346		ipcperm_new2old(&buf.shm_perm, &old.shm_perm);
1347		if (buf.shm_segsz > INT_MAX)
1348			old.shm_segsz = INT_MAX;
1349		else
1350			CP(buf, old, shm_segsz);
1351		CP(buf, old, shm_lpid);
1352		CP(buf, old, shm_cpid);
1353		if (buf.shm_nattch > SHRT_MAX)
1354			old.shm_nattch = SHRT_MAX;
1355		else
1356			CP(buf, old, shm_nattch);
1357		CP(buf, old, shm_atime);
1358		CP(buf, old, shm_dtime);
1359		CP(buf, old, shm_ctime);
1360		old.shm_internal = NULL;
1361		error = copyout(&old, uap->buf, sizeof(old));
1362		break;
1363	}
1364
1365done:
1366	if (error) {
1367		/* Invalidate the return value */
1368		td->td_retval[0] = -1;
1369	}
1370	return (error);
1371}
1372
1373#endif	/* COMPAT_FREEBSD4 || COMPAT_FREEBSD5 || COMPAT_FREEBSD6 ||
1374	   COMPAT_FREEBSD7 */
1375
1376static int
1377sysvshm_modload(struct module *module, int cmd, void *arg)
1378{
1379	int error = 0;
1380
1381	switch (cmd) {
1382	case MOD_LOAD:
1383		error = shminit();
1384		if (error != 0)
1385			shmunload();
1386		break;
1387	case MOD_UNLOAD:
1388		error = shmunload();
1389		break;
1390	case MOD_SHUTDOWN:
1391		break;
1392	default:
1393		error = EINVAL;
1394		break;
1395	}
1396	return (error);
1397}
1398
1399static moduledata_t sysvshm_mod = {
1400	"sysvshm",
1401	&sysvshm_modload,
1402	NULL
1403};
1404
1405DECLARE_MODULE(sysvshm, sysvshm_mod, SI_SUB_SYSV_SHM, SI_ORDER_FIRST);
1406MODULE_VERSION(sysvshm, 1);
1407