1/*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*	$NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $	*/
29
30/*
31 * Copyright (c) 1994 Adam Glass and Charles Hannum.  All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 *    notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 *    notice, this list of conditions and the following disclaimer in the
40 *    documentation and/or other materials provided with the distribution.
41 * 3. All advertising materials mentioning features or use of this software
42 *    must display the following acknowledgement:
43 *	This product includes software developed by Adam Glass and Charles
44 *	Hannum.
45 * 4. The names of the authors may not be used to endorse or promote products
46 *    derived from this software without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
49 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
50 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
51 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
52 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
53 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
54 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
55 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
56 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
57 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
58 */
59/*
60 * NOTICE: This file was modified by McAfee Research in 2004 to introduce
61 * support for mandatory and extensible security protections.  This notice
62 * is included in support of clause 2.2 (b) of the Apple Public License,
63 * Version 2.0.
64 * Copyright (c) 2005-2006 SPARTA, Inc.
65*/
66
67
68#include <sys/appleapiopts.h>
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/kernel.h>
72#include <sys/shm_internal.h>
73#include <sys/proc_internal.h>
74#include <sys/kauth.h>
75#include <sys/malloc.h>
76#include <sys/mman.h>
77#include <sys/stat.h>
78#include <sys/sysctl.h>
79#include <sys/ipcs.h>
80#include <sys/sysent.h>
81#include <sys/sysproto.h>
82#if CONFIG_MACF
83#include <security/mac_framework.h>
84#endif
85
86#include <security/audit/audit.h>
87
88#include <mach/mach_types.h>
89#include <mach/vm_inherit.h>
90#include <mach/vm_map.h>
91
92#include <mach/mach_vm.h>
93
94#include <vm/vm_map.h>
95#include <vm/vm_protos.h>
96
97#include <kern/locks.h>
98
99/* Uncomment this line to see MAC debugging output. */
100/* #define MAC_DEBUG */
101#if CONFIG_MACF_DEBUG
102#define	MPRINTF(a)	printf a
103#else
104#define	MPRINTF(a)
105#endif
106
107#if SYSV_SHM
108static void shminit(void *);
109#if 0
110SYSINIT(sysv_shm, SI_SUB_SYSV_SHM, SI_ORDER_FIRST, shminit, NULL)
111#endif
112
113static lck_grp_t       *sysv_shm_subsys_lck_grp;
114static lck_grp_attr_t  *sysv_shm_subsys_lck_grp_attr;
115static lck_attr_t      *sysv_shm_subsys_lck_attr;
116static lck_mtx_t        sysv_shm_subsys_mutex;
117
118#define SYSV_SHM_SUBSYS_LOCK() lck_mtx_lock(&sysv_shm_subsys_mutex)
119#define SYSV_SHM_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_shm_subsys_mutex)
120
121static int oshmctl(void *p, void *uap, void *retval);
122static int shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode, int * retval);
123static int shmget_existing(struct shmget_args *uap, int mode, int segnum, int  * retval);
124static void shmid_ds_64to32(struct user_shmid_ds *in, struct user32_shmid_ds *out);
125static void shmid_ds_32to64(struct user32_shmid_ds *in, struct user_shmid_ds *out);
126
127/* XXX casting to (sy_call_t *) is bogus, as usual. */
128static sy_call_t *shmcalls[] = {
129	(sy_call_t *)shmat, (sy_call_t *)oshmctl,
130	(sy_call_t *)shmdt, (sy_call_t *)shmget,
131	(sy_call_t *)shmctl
132};
133
134#define	SHMSEG_FREE     	0x0200
135#define	SHMSEG_REMOVED  	0x0400
136#define	SHMSEG_ALLOCATED	0x0800
137#define	SHMSEG_WANTED		0x1000
138
139static int shm_last_free, shm_nused, shm_committed;
140struct shmid_kernel	*shmsegs;	/* 64 bit version */
141static int shm_inited = 0;
142
143/*
144 * Since anonymous memory chunks are limited to ANON_MAX_SIZE bytes,
145 * we have to keep a list of chunks when we want to handle a shared memory
146 * segment bigger than ANON_MAX_SIZE.
147 * Each chunk points to a VM named entry of up to ANON_MAX_SIZE bytes
148 * of anonymous memory.
149 */
150struct shm_handle {
151	void * shm_object;			/* named entry for this chunk*/
152	memory_object_size_t shm_handle_size;	/* size of this chunk */
153	struct shm_handle *shm_handle_next;	/* next chunk */
154};
155
156struct shmmap_state {
157	mach_vm_address_t va;		/* user address */
158	int shmid;			/* segment id */
159};
160
161static void shm_deallocate_segment(struct shmid_kernel *);
162static int shm_find_segment_by_key(key_t);
163static struct shmid_kernel *shm_find_segment_by_shmid(int);
164static int shm_delete_mapping(struct proc *, struct shmmap_state *, int);
165
166#ifdef __APPLE_API_PRIVATE
167#define DEFAULT_SHMMAX	(4 * 1024 * 1024)
168#define DEFAULT_SHMMIN	1
169#define DEFAULT_SHMMNI	32
170#define DEFAULT_SHMSEG	8
171#define DEFAULT_SHMALL	1024
172struct  shminfo shminfo = {
173        DEFAULT_SHMMAX,
174        DEFAULT_SHMMIN,
175        DEFAULT_SHMMNI,
176	DEFAULT_SHMSEG,
177	DEFAULT_SHMALL
178};
179#endif /* __APPLE_API_PRIVATE */
180
181void sysv_shm_lock_init(void);
182
183static __inline__ time_t
184sysv_shmtime(void)
185{
186	struct timeval	tv;
187	microtime(&tv);
188	return (tv.tv_sec);
189}
190
191/*
192 * This conversion is safe, since if we are converting for a 32 bit process,
193 * then it's value of (struct shmid_ds)->shm_segsz will never exceed 4G.
194 *
195 * NOTE: Source and target may *NOT* overlap! (target is smaller)
196 */
197static void
198shmid_ds_64to32(struct user_shmid_ds *in, struct user32_shmid_ds *out)
199{
200	out->shm_perm = in->shm_perm;
201	out->shm_segsz = in->shm_segsz;
202	out->shm_lpid = in->shm_lpid;
203	out->shm_cpid = in->shm_cpid;
204	out->shm_nattch = in->shm_nattch;
205	out->shm_atime = in->shm_atime;
206	out->shm_dtime = in->shm_dtime;
207	out->shm_ctime = in->shm_ctime;
208	out->shm_internal = CAST_DOWN_EXPLICIT(int,in->shm_internal);
209}
210
211/*
212 * NOTE: Source and target may are permitted to overlap! (source is smaller);
213 * this works because we copy fields in order from the end of the struct to
214 * the beginning.
215 */
216static void
217shmid_ds_32to64(struct user32_shmid_ds *in, struct user_shmid_ds *out)
218{
219	out->shm_internal = in->shm_internal;
220	out->shm_ctime = in->shm_ctime;
221	out->shm_dtime = in->shm_dtime;
222	out->shm_atime = in->shm_atime;
223	out->shm_nattch = in->shm_nattch;
224	out->shm_cpid = in->shm_cpid;
225	out->shm_lpid = in->shm_lpid;
226	out->shm_segsz = in->shm_segsz;
227	out->shm_perm = in->shm_perm;
228}
229
230
231static int
232shm_find_segment_by_key(key_t key)
233{
234	int i;
235
236	for (i = 0; i < shminfo.shmmni; i++)
237		if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
238		    shmsegs[i].u.shm_perm._key == key)
239			return i;
240	return -1;
241}
242
243static struct shmid_kernel *
244shm_find_segment_by_shmid(int shmid)
245{
246	int segnum;
247	struct shmid_kernel *shmseg;
248
249	segnum = IPCID_TO_IX(shmid);
250	if (segnum < 0 || segnum >= shminfo.shmmni)
251		return NULL;
252	shmseg = &shmsegs[segnum];
253	if ((shmseg->u.shm_perm.mode & (SHMSEG_ALLOCATED | SHMSEG_REMOVED))
254	    != SHMSEG_ALLOCATED ||
255	    shmseg->u.shm_perm._seq != IPCID_TO_SEQ(shmid))
256		return NULL;
257	return shmseg;
258}
259
260static void
261shm_deallocate_segment(struct shmid_kernel *shmseg)
262{
263	struct shm_handle *shm_handle, *shm_handle_next;
264	mach_vm_size_t size;
265
266	for (shm_handle = CAST_DOWN(void *,shmseg->u.shm_internal); /* tunnel */
267	     shm_handle != NULL;
268	     shm_handle = shm_handle_next) {
269		shm_handle_next = shm_handle->shm_handle_next;
270		mach_memory_entry_port_release(shm_handle->shm_object);
271		FREE((caddr_t) shm_handle, M_SHM);
272	}
273	shmseg->u.shm_internal = USER_ADDR_NULL;		/* tunnel */
274	size = mach_vm_round_page(shmseg->u.shm_segsz);
275	shm_committed -= btoc(size);
276	shm_nused--;
277	shmseg->u.shm_perm.mode = SHMSEG_FREE;
278#if CONFIG_MACF
279	/* Reset the MAC label */
280	mac_sysvshm_label_recycle(shmseg);
281#endif
282}
283
284static int
285shm_delete_mapping(__unused struct proc *p, struct shmmap_state *shmmap_s,
286	int deallocate)
287{
288	struct shmid_kernel *shmseg;
289	int segnum, result;
290	mach_vm_size_t size;
291
292	segnum = IPCID_TO_IX(shmmap_s->shmid);
293	shmseg = &shmsegs[segnum];
294	size = mach_vm_round_page(shmseg->u.shm_segsz);	/* XXX done for us? */
295	if (deallocate) {
296	result = mach_vm_deallocate(current_map(), shmmap_s->va, size);
297	if (result != KERN_SUCCESS)
298		return EINVAL;
299	}
300	shmmap_s->shmid = -1;
301	shmseg->u.shm_dtime = sysv_shmtime();
302	if ((--shmseg->u.shm_nattch <= 0) &&
303	    (shmseg->u.shm_perm.mode & SHMSEG_REMOVED)) {
304		shm_deallocate_segment(shmseg);
305		shm_last_free = segnum;
306	}
307	return 0;
308}
309
310int
311shmdt(struct proc *p, struct shmdt_args *uap, int32_t *retval)
312{
313#if CONFIG_MACF
314	struct shmid_kernel *shmsegptr;
315#endif
316	struct shmmap_state *shmmap_s;
317	int i;
318	int shmdtret = 0;
319
320	AUDIT_ARG(svipc_addr, uap->shmaddr);
321
322	SYSV_SHM_SUBSYS_LOCK();
323
324	if (!shm_inited) {
325		shminit(NULL);
326	}
327	shmmap_s = (struct shmmap_state *)p->vm_shm;
328 	if (shmmap_s == NULL) {
329		shmdtret = EINVAL;
330		goto shmdt_out;
331	}
332
333	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
334		if (shmmap_s->shmid != -1 &&
335		    shmmap_s->va == (mach_vm_offset_t)uap->shmaddr)
336			break;
337	if (i == shminfo.shmseg) {
338		shmdtret = EINVAL;
339		goto shmdt_out;
340	}
341#if CONFIG_MACF
342	/*
343	 * XXX: It might be useful to move this into the shm_delete_mapping
344	 * function
345	 */
346	shmsegptr = &shmsegs[IPCID_TO_IX(shmmap_s->shmid)];
347	shmdtret = mac_sysvshm_check_shmdt(kauth_cred_get(), shmsegptr);
348	if (shmdtret)
349		goto shmdt_out;
350#endif
351	i = shm_delete_mapping(p, shmmap_s, 1);
352
353	if (i == 0)
354		*retval = 0;
355	shmdtret = i;
356shmdt_out:
357	SYSV_SHM_SUBSYS_UNLOCK();
358	return shmdtret;
359}
360
361int
362shmat(struct proc *p, struct shmat_args *uap, user_addr_t *retval)
363{
364	int error, i, flags;
365	struct shmid_kernel	*shmseg;
366	struct shmmap_state	*shmmap_s = NULL;
367	struct shm_handle	*shm_handle;
368	mach_vm_address_t	attach_va;	/* attach address in/out */
369	mach_vm_size_t		map_size;	/* size of map entry */
370	mach_vm_size_t		mapped_size;
371	vm_prot_t		prot;
372	size_t			size;
373	kern_return_t		rv;
374	int			shmat_ret;
375	int			vm_flags;
376
377	shmat_ret = 0;
378
379	AUDIT_ARG(svipc_id, uap->shmid);
380	AUDIT_ARG(svipc_addr, uap->shmaddr);
381
382	SYSV_SHM_SUBSYS_LOCK();
383
384	if (!shm_inited) {
385		shminit(NULL);
386	}
387
388	shmmap_s = (struct shmmap_state *)p->vm_shm;
389
390	if (shmmap_s == NULL) {
391		size = shminfo.shmseg * sizeof(struct shmmap_state);
392		MALLOC(shmmap_s, struct shmmap_state *, size, M_SHM, M_WAITOK);
393		if (shmmap_s == NULL) {
394			shmat_ret = ENOMEM;
395			goto shmat_out;
396		}
397		for (i = 0; i < shminfo.shmseg; i++)
398			shmmap_s[i].shmid = -1;
399		p->vm_shm = (caddr_t)shmmap_s;
400	}
401	shmseg = shm_find_segment_by_shmid(uap->shmid);
402	if (shmseg == NULL) {
403		shmat_ret = EINVAL;
404		goto shmat_out;
405	}
406
407	AUDIT_ARG(svipc_perm, &shmseg->u.shm_perm);
408	error = ipcperm(kauth_cred_get(), &shmseg->u.shm_perm,
409	    (uap->shmflg & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W);
410	if (error) {
411		shmat_ret = error;
412		goto shmat_out;
413	}
414
415#if CONFIG_MACF
416	error = mac_sysvshm_check_shmat(kauth_cred_get(), shmseg, uap->shmflg);
417	if (error) {
418		shmat_ret = error;
419		goto shmat_out;
420	}
421#endif
422	for (i = 0; i < shminfo.shmseg; i++) {
423		if (shmmap_s->shmid == -1)
424			break;
425		shmmap_s++;
426	}
427	if (i >= shminfo.shmseg) {
428		shmat_ret = EMFILE;
429		goto shmat_out;
430	}
431
432	map_size = mach_vm_round_page(shmseg->u.shm_segsz);
433	prot = VM_PROT_READ;
434	if ((uap->shmflg & SHM_RDONLY) == 0)
435		prot |= VM_PROT_WRITE;
436	flags = MAP_ANON | MAP_SHARED;
437	if (uap->shmaddr)
438		flags |= MAP_FIXED;
439
440	attach_va = (mach_vm_address_t)uap->shmaddr;
441	if (uap->shmflg & SHM_RND)
442		attach_va &= ~(SHMLBA-1);
443	else if ((attach_va & (SHMLBA-1)) != 0) {
444		shmat_ret = EINVAL;
445		goto shmat_out;
446	}
447
448	if (flags & MAP_FIXED) {
449		vm_flags = VM_FLAGS_FIXED;
450	} else {
451		vm_flags = VM_FLAGS_ANYWHERE;
452	}
453
454	mapped_size = 0;
455
456	/* first reserve enough space... */
457	rv = mach_vm_map(current_map(),
458			 &attach_va,
459			 map_size,
460			 0,
461			 vm_flags,
462			 IPC_PORT_NULL,
463			 0,
464			 FALSE,
465			 VM_PROT_NONE,
466			 VM_PROT_NONE,
467			 VM_INHERIT_NONE);
468	if (rv != KERN_SUCCESS) {
469		goto out;
470	}
471
472	shmmap_s->va = attach_va;
473
474	/* ... then map the shared memory over the reserved space */
475	for (shm_handle = CAST_DOWN(void *, shmseg->u.shm_internal);/* tunnel */
476	     shm_handle != NULL;
477	     shm_handle = shm_handle->shm_handle_next) {
478
479		rv = vm_map_enter_mem_object(
480			current_map(),		/* process map */
481			&attach_va,		/* attach address */
482			shm_handle->shm_handle_size, /* segment size */
483			(mach_vm_offset_t)0,	/* alignment mask */
484			VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
485			shm_handle->shm_object,
486			(mach_vm_offset_t)0,
487			FALSE,
488			prot,
489			prot,
490			VM_INHERIT_SHARE);
491		if (rv != KERN_SUCCESS)
492			goto out;
493
494		mapped_size += shm_handle->shm_handle_size;
495		attach_va = attach_va + shm_handle->shm_handle_size;
496	}
497
498	shmmap_s->shmid = uap->shmid;
499	shmseg->u.shm_lpid = p->p_pid;
500	shmseg->u.shm_atime = sysv_shmtime();
501	shmseg->u.shm_nattch++;
502	*retval = shmmap_s->va;	/* XXX return -1 on error */
503	shmat_ret = 0;
504	goto shmat_out;
505out:
506	if (mapped_size > 0) {
507		(void) mach_vm_deallocate(current_map(),
508					  shmmap_s->va,
509					  mapped_size);
510	}
511	switch (rv) {
512	case KERN_INVALID_ADDRESS:
513	case KERN_NO_SPACE:
514		shmat_ret = ENOMEM;
515		break;
516	case KERN_PROTECTION_FAILURE:
517		shmat_ret = EACCES;
518		break;
519	default:
520		shmat_ret = EINVAL;
521		break;
522	}
523shmat_out:
524	SYSV_SHM_SUBSYS_UNLOCK();
525	return shmat_ret;
526}
527
528static int
529oshmctl(__unused void *p, __unused void *uap, __unused void *retval)
530{
531	return EINVAL;
532}
533
534/*
535 * Returns:	0			Success
536 *		EINVAL
537 *	copyout:EFAULT
538 *	copyin:EFAULT
539 *	ipcperm:EPERM
540 *	ipcperm:EACCES
541 */
542int
543shmctl(__unused struct proc *p, struct shmctl_args *uap, int32_t *retval)
544{
545	int error;
546	kauth_cred_t cred = kauth_cred_get();
547	struct user_shmid_ds inbuf;
548	struct shmid_kernel *shmseg;
549
550	int shmctl_ret = 0;
551
552	AUDIT_ARG(svipc_cmd, uap->cmd);
553	AUDIT_ARG(svipc_id, uap->shmid);
554
555	SYSV_SHM_SUBSYS_LOCK();
556
557	if (!shm_inited) {
558		shminit(NULL);
559	}
560
561	shmseg = shm_find_segment_by_shmid(uap->shmid);
562	if (shmseg == NULL) {
563		shmctl_ret = EINVAL;
564		goto shmctl_out;
565	}
566
567	/* XXAUDIT: This is the perms BEFORE any change by this call. This
568	 * may not be what is desired.
569	 */
570	AUDIT_ARG(svipc_perm, &shmseg->u.shm_perm);
571
572#if CONFIG_MACF
573	error = mac_sysvshm_check_shmctl(cred, shmseg, uap->cmd);
574	if (error) {
575		shmctl_ret = error;
576		goto shmctl_out;
577	}
578#endif
579	switch (uap->cmd) {
580	case IPC_STAT:
581		error = ipcperm(cred, &shmseg->u.shm_perm, IPC_R);
582		if (error) {
583			shmctl_ret = error;
584			goto shmctl_out;
585		}
586
587		if (IS_64BIT_PROCESS(p)) {
588			error = copyout((caddr_t)&shmseg->u, uap->buf, sizeof(struct user_shmid_ds));
589		} else {
590			struct user32_shmid_ds shmid_ds32;
591			shmid_ds_64to32(&shmseg->u, &shmid_ds32);
592			error = copyout(&shmid_ds32, uap->buf, sizeof(shmid_ds32));
593		}
594		if (error) {
595			shmctl_ret = error;
596			goto shmctl_out;
597		}
598		break;
599	case IPC_SET:
600		error = ipcperm(cred, &shmseg->u.shm_perm, IPC_M);
601		if (error) {
602			shmctl_ret = error;
603			goto shmctl_out;
604		}
605		if (IS_64BIT_PROCESS(p)) {
606			error = copyin(uap->buf, &inbuf, sizeof(struct user_shmid_ds));
607		} else {
608			struct user32_shmid_ds shmid_ds32;
609			error = copyin(uap->buf, &shmid_ds32, sizeof(shmid_ds32));
610			/* convert in place; ugly, but safe */
611			shmid_ds_32to64(&shmid_ds32, &inbuf);
612		}
613		if (error) {
614			shmctl_ret = error;
615			goto shmctl_out;
616		}
617		shmseg->u.shm_perm.uid = inbuf.shm_perm.uid;
618		shmseg->u.shm_perm.gid = inbuf.shm_perm.gid;
619		shmseg->u.shm_perm.mode =
620		    (shmseg->u.shm_perm.mode & ~ACCESSPERMS) |
621		    (inbuf.shm_perm.mode & ACCESSPERMS);
622		shmseg->u.shm_ctime = sysv_shmtime();
623		break;
624	case IPC_RMID:
625		error = ipcperm(cred, &shmseg->u.shm_perm, IPC_M);
626		if (error) {
627			shmctl_ret = error;
628			goto shmctl_out;
629		}
630		shmseg->u.shm_perm._key = IPC_PRIVATE;
631		shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
632		if (shmseg->u.shm_nattch <= 0) {
633			shm_deallocate_segment(shmseg);
634			shm_last_free = IPCID_TO_IX(uap->shmid);
635		}
636		break;
637#if 0
638	case SHM_LOCK:
639	case SHM_UNLOCK:
640#endif
641	default:
642		shmctl_ret = EINVAL;
643		goto shmctl_out;
644	}
645	*retval = 0;
646	shmctl_ret = 0;
647shmctl_out:
648	SYSV_SHM_SUBSYS_UNLOCK();
649	return shmctl_ret;
650}
651
652static int
653shmget_existing(struct shmget_args *uap, int mode, int segnum, int *retval)
654{
655	struct shmid_kernel *shmseg;
656	int error = 0;
657
658	shmseg = &shmsegs[segnum];
659	if (shmseg->u.shm_perm.mode & SHMSEG_REMOVED) {
660		/*
661		 * This segment is in the process of being allocated.  Wait
662		 * until it's done, and look the key up again (in case the
663		 * allocation failed or it was freed).
664		 */
665		shmseg->u.shm_perm.mode |= SHMSEG_WANTED;
666		error = tsleep((caddr_t)shmseg, PLOCK | PCATCH, "shmget", 0);
667		if (error)
668			return error;
669		return EAGAIN;
670	}
671
672	/*
673	 * The low 9 bits of shmflag are the mode bits being requested, which
674	 * are the actual mode bits desired on the segment, and not in IPC_R
675	 * form; therefore it would be incorrect to call ipcperm() to validate
676	 * them; instead, we AND the existing mode with the requested mode, and
677	 * verify that it matches the requested mode; otherwise, we fail with
678	 * EACCES (access denied).
679	 */
680	if ((shmseg->u.shm_perm.mode & mode) != mode)
681		return EACCES;
682
683#if CONFIG_MACF
684	error = mac_sysvshm_check_shmget(kauth_cred_get(), shmseg, uap->shmflg);
685	if (error)
686		return (error);
687#endif
688
689	if (uap->size && uap->size > shmseg->u.shm_segsz)
690		return EINVAL;
691
692       if ((uap->shmflg & (IPC_CREAT | IPC_EXCL)) == (IPC_CREAT | IPC_EXCL))
693		return EEXIST;
694
695	*retval = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
696	return 0;
697}
698
699static int
700shmget_allocate_segment(struct proc *p, struct shmget_args *uap, int mode,
701	int *retval)
702{
703	int i, segnum, shmid;
704	kauth_cred_t cred = kauth_cred_get();
705	struct shmid_kernel *shmseg;
706	struct shm_handle *shm_handle;
707	kern_return_t kret;
708	mach_vm_size_t total_size, size, alloc_size;
709	void * mem_object;
710	struct shm_handle *shm_handle_next, **shm_handle_next_p;
711
712	if (uap->size < (user_size_t)shminfo.shmmin ||
713	    uap->size > (user_size_t)shminfo.shmmax)
714		return EINVAL;
715	if (shm_nused >= shminfo.shmmni) /* any shmids left? */
716		return ENOSPC;
717	total_size = mach_vm_round_page(uap->size);
718	if ((user_ssize_t)(shm_committed + btoc(total_size)) > shminfo.shmall)
719		return ENOMEM;
720	if (shm_last_free < 0) {
721		for (i = 0; i < shminfo.shmmni; i++)
722			if (shmsegs[i].u.shm_perm.mode & SHMSEG_FREE)
723				break;
724		if (i == shminfo.shmmni)
725			panic("shmseg free count inconsistent");
726		segnum = i;
727	} else  {
728		segnum = shm_last_free;
729		shm_last_free = -1;
730	}
731	shmseg = &shmsegs[segnum];
732
733	/*
734	 * In case we sleep in malloc(), mark the segment present but deleted
735	 * so that noone else tries to create the same key.
736	 * XXX but we don't release the global lock !?
737	 */
738	shmseg->u.shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED;
739	shmseg->u.shm_perm._key = uap->key;
740	shmseg->u.shm_perm._seq = (shmseg->u.shm_perm._seq + 1) & 0x7fff;
741
742	shm_handle_next_p = NULL;
743	for (alloc_size = 0;
744	     alloc_size < total_size;
745	     alloc_size += size) {
746		size = MIN(total_size - alloc_size, ANON_MAX_SIZE);
747		kret = mach_make_memory_entry_64(
748			VM_MAP_NULL,
749			(memory_object_size_t *) &size,
750			(memory_object_offset_t) 0,
751			MAP_MEM_NAMED_CREATE | VM_PROT_DEFAULT,
752			(ipc_port_t *) &mem_object, 0);
753		if (kret != KERN_SUCCESS)
754			goto out;
755
756		MALLOC(shm_handle, struct shm_handle *, sizeof(struct shm_handle), M_SHM, M_WAITOK);
757		if (shm_handle == NULL) {
758			kret = KERN_NO_SPACE;
759			mach_memory_entry_port_release(mem_object);
760			mem_object = NULL;
761			goto out;
762		}
763		shm_handle->shm_object = mem_object;
764		shm_handle->shm_handle_size = size;
765		shm_handle->shm_handle_next = NULL;
766		if (shm_handle_next_p == NULL) {
767			shmseg->u.shm_internal = CAST_USER_ADDR_T(shm_handle);/* tunnel */
768		} else {
769			*shm_handle_next_p = shm_handle;
770		}
771		shm_handle_next_p = &shm_handle->shm_handle_next;
772	}
773
774	shmid = IXSEQ_TO_IPCID(segnum, shmseg->u.shm_perm);
775
776	shmseg->u.shm_perm.cuid = shmseg->u.shm_perm.uid = kauth_cred_getuid(cred);
777	shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = kauth_cred_getgid(cred);
778	shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) |
779	    (mode & ACCESSPERMS) | SHMSEG_ALLOCATED;
780	shmseg->u.shm_segsz = uap->size;
781	shmseg->u.shm_cpid = p->p_pid;
782	shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0;
783	shmseg->u.shm_atime = shmseg->u.shm_dtime = 0;
784#if CONFIG_MACF
785	mac_sysvshm_label_associate(cred, shmseg);
786#endif
787	shmseg->u.shm_ctime = sysv_shmtime();
788	shm_committed += btoc(size);
789	shm_nused++;
790	AUDIT_ARG(svipc_perm, &shmseg->u.shm_perm);
791	if (shmseg->u.shm_perm.mode & SHMSEG_WANTED) {
792		/*
793		 * Somebody else wanted this key while we were asleep.  Wake
794		 * them up now.
795		 */
796		shmseg->u.shm_perm.mode &= ~SHMSEG_WANTED;
797		wakeup((caddr_t)shmseg);
798	}
799	*retval = shmid;
800	AUDIT_ARG(svipc_id, shmid);
801	return 0;
802out:
803	if (kret != KERN_SUCCESS) {
804		for (shm_handle = CAST_DOWN(void *,shmseg->u.shm_internal); /* tunnel */
805		     shm_handle != NULL;
806		     shm_handle = shm_handle_next) {
807			shm_handle_next = shm_handle->shm_handle_next;
808			mach_memory_entry_port_release(shm_handle->shm_object);
809			FREE((caddr_t) shm_handle, M_SHM);
810		}
811		shmseg->u.shm_internal = USER_ADDR_NULL; /* tunnel */
812	}
813
814	switch (kret) {
815	case KERN_INVALID_ADDRESS:
816	case KERN_NO_SPACE:
817		return (ENOMEM);
818	case KERN_PROTECTION_FAILURE:
819		return (EACCES);
820	default:
821		return (EINVAL);
822	}
823
824}
825
826int
827shmget(struct proc *p, struct shmget_args *uap, int32_t *retval)
828{
829	int segnum, mode, error;
830	int shmget_ret = 0;
831
832	/* Auditing is actually done in shmget_allocate_segment() */
833
834	SYSV_SHM_SUBSYS_LOCK();
835
836	if (!shm_inited) {
837		shminit(NULL);
838	}
839
840	mode = uap->shmflg & ACCESSPERMS;
841	if (uap->key != IPC_PRIVATE) {
842	again:
843		segnum = shm_find_segment_by_key(uap->key);
844		if (segnum >= 0) {
845			error = shmget_existing(uap, mode, segnum, retval);
846			if (error == EAGAIN)
847				goto again;
848			shmget_ret = error;
849			goto shmget_out;
850		}
851		if ((uap->shmflg & IPC_CREAT) == 0) {
852			shmget_ret = ENOENT;
853			goto shmget_out;
854		}
855	}
856	shmget_ret = shmget_allocate_segment(p, uap, mode, retval);
857shmget_out:
858	SYSV_SHM_SUBSYS_UNLOCK();
859	return shmget_ret;
860	/*NOTREACHED*/
861
862}
863
864/*
865 * shmsys
866 *
867 * Entry point for all SHM calls: shmat, oshmctl, shmdt, shmget, shmctl
868 *
869 * Parameters:	p	Process requesting the call
870 * 		uap	User argument descriptor (see below)
871 * 		retval	Return value of the selected shm call
872 *
873 * Indirect parameters:	uap->which	msg call to invoke (index in array of shm calls)
874 * 			uap->a2		User argument descriptor
875 *
876 * Returns:	0	Success
877 * 		!0	Not success
878 *
879 * Implicit returns: retval     Return value of the selected shm call
880 *
881 * DEPRECATED:  This interface should not be used to call the other SHM
882 * 		functions (shmat, oshmctl, shmdt, shmget, shmctl). The correct
883 * 		usage is to call the other SHM functions directly.
884 */
885int
886shmsys(struct proc *p, struct shmsys_args *uap, int32_t *retval)
887{
888
889	/* The routine that we are dispatching already does this */
890
891	if (uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))
892		return EINVAL;
893	return ((*shmcalls[uap->which])(p, &uap->a2, retval));
894}
895
896/*
897 * Return 0 on success, 1 on failure.
898 */
899int
900shmfork(struct proc *p1, struct proc *p2)
901{
902	struct shmmap_state *shmmap_s;
903	size_t size;
904	int i;
905	int shmfork_ret = 0;
906
907	SYSV_SHM_SUBSYS_LOCK();
908
909	if (!shm_inited) {
910		shminit(NULL);
911	}
912
913	size = shminfo.shmseg * sizeof(struct shmmap_state);
914	MALLOC(shmmap_s, struct shmmap_state *, size, M_SHM, M_WAITOK);
915	if (shmmap_s != NULL) {
916		bcopy((caddr_t)p1->vm_shm, (caddr_t)shmmap_s, size);
917		p2->vm_shm = (caddr_t)shmmap_s;
918		for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
919			if (shmmap_s->shmid != -1)
920				shmsegs[IPCID_TO_IX(shmmap_s->shmid)].u.shm_nattch++;
921		shmfork_ret = 0;
922		goto shmfork_out;
923	}
924
925	shmfork_ret = 1;	/* failed to copy to child - ENOMEM */
926shmfork_out:
927	SYSV_SHM_SUBSYS_UNLOCK();
928	return shmfork_ret;
929}
930
931void
932shmexit(struct proc *p)
933{
934	struct shmmap_state *shmmap_s;
935	int i;
936
937	shmmap_s = (struct shmmap_state *)p->vm_shm;
938
939	SYSV_SHM_SUBSYS_LOCK();
940	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
941		if (shmmap_s->shmid != -1)
942			/*
943			 * XXX: Should the MAC framework enforce
944			 * check here as well.
945			 */
946			shm_delete_mapping(p, shmmap_s, 1);
947	FREE((caddr_t)p->vm_shm, M_SHM);
948	p->vm_shm = NULL;
949	SYSV_SHM_SUBSYS_UNLOCK();
950}
951
952/*
953 * shmexec() is like shmexit(), only it doesn't delete the mappings,
954 * since the old address space has already been destroyed and the new
955 * one instantiated.  Instead, it just does the housekeeping work we
956 * need to do to keep the System V shared memory subsystem sane.
957 */
958__private_extern__ void
959shmexec(struct proc *p)
960{
961	struct shmmap_state *shmmap_s;
962	int i;
963
964	shmmap_s = (struct shmmap_state *)p->vm_shm;
965	SYSV_SHM_SUBSYS_LOCK();
966	for (i = 0; i < shminfo.shmseg; i++, shmmap_s++)
967		if (shmmap_s->shmid != -1)
968			shm_delete_mapping(p, shmmap_s, 0);
969	FREE((caddr_t)p->vm_shm, M_SHM);
970	p->vm_shm = NULL;
971	SYSV_SHM_SUBSYS_UNLOCK();
972}
973
974void
975shminit(__unused void *dummy)
976{
977	int i;
978	int s;
979
980	if (!shm_inited) {
981		/*
982		 * we store internally 64 bit, since if we didn't, we would
983		 * be unable to represent a segment size in excess of 32 bits
984		 * with the (struct shmid_ds)->shm_segsz field; also, POSIX
985		 * dictates this filed be a size_t, which is 64 bits when
986		 * running 64 bit binaries.
987		 */
988		s = sizeof(struct shmid_kernel) * shminfo.shmmni;
989
990		MALLOC(shmsegs, struct shmid_kernel *, s, M_SHM, M_WAITOK);
991		if (shmsegs == NULL) {
992			/* XXX fail safely: leave shared memory uninited */
993			return;
994		}
995		for (i = 0; i < shminfo.shmmni; i++) {
996			shmsegs[i].u.shm_perm.mode = SHMSEG_FREE;
997			shmsegs[i].u.shm_perm._seq = 0;
998#if CONFIG_MACF
999			mac_sysvshm_label_init(&shmsegs[i]);
1000#endif
1001		}
1002		shm_last_free = 0;
1003		shm_nused = 0;
1004		shm_committed = 0;
1005		shm_inited = 1;
1006	}
1007}
1008/* Initialize the mutex governing access to the SysV shm subsystem */
1009__private_extern__ void
1010sysv_shm_lock_init( void )
1011{
1012
1013	sysv_shm_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
1014
1015	sysv_shm_subsys_lck_grp = lck_grp_alloc_init("sysv_shm_subsys_lock", sysv_shm_subsys_lck_grp_attr);
1016
1017	sysv_shm_subsys_lck_attr = lck_attr_alloc_init();
1018	lck_mtx_init(&sysv_shm_subsys_mutex, sysv_shm_subsys_lck_grp, sysv_shm_subsys_lck_attr);
1019}
1020
1021/* (struct sysctl_oid *oidp, void *arg1, int arg2, \
1022        struct sysctl_req *req) */
1023static int
1024sysctl_shminfo(__unused struct sysctl_oid *oidp, void *arg1,
1025	__unused int arg2, struct sysctl_req *req)
1026{
1027	int error = 0;
1028	int sysctl_shminfo_ret = 0;
1029	uint64_t	saved_shmmax;
1030
1031	error = SYSCTL_OUT(req, arg1, sizeof(int64_t));
1032	if (error || req->newptr == USER_ADDR_NULL)
1033		return(error);
1034
1035	SYSV_SHM_SUBSYS_LOCK();
1036
1037	/* shmmni can not be changed after SysV SHM has been initialized */
1038	if (shm_inited && arg1 == &shminfo.shmmni) {
1039		sysctl_shminfo_ret = EPERM;
1040		goto sysctl_shminfo_out;
1041	}
1042	saved_shmmax = shminfo.shmmax;
1043
1044	if ((error = SYSCTL_IN(req, arg1, sizeof(int64_t))) != 0) {
1045		sysctl_shminfo_ret = error;
1046		goto sysctl_shminfo_out;
1047	}
1048
1049	if (arg1 == &shminfo.shmmax) {
1050		/* shmmax needs to be page-aligned */
1051		if (shminfo.shmmax & PAGE_MASK_64) {
1052			shminfo.shmmax = saved_shmmax;
1053			sysctl_shminfo_ret = EINVAL;
1054			goto sysctl_shminfo_out;
1055		}
1056	}
1057	sysctl_shminfo_ret = 0;
1058sysctl_shminfo_out:
1059	SYSV_SHM_SUBSYS_UNLOCK();
1060	return sysctl_shminfo_ret;
1061}
1062
1063static int
1064IPCS_shm_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
1065	__unused int arg2, struct sysctl_req *req)
1066{
1067	int error;
1068	int cursor;
1069	union {
1070		struct user32_IPCS_command u32;
1071		struct user_IPCS_command u64;
1072	} ipcs;
1073	struct user32_shmid_ds shmid_ds32;	/* post conversion, 32 bit version */
1074	void *shmid_dsp;
1075	size_t ipcs_sz = sizeof(struct user_IPCS_command);
1076	size_t shmid_ds_sz = sizeof(struct user_shmid_ds);
1077	struct proc *p = current_proc();
1078
1079	SYSV_SHM_SUBSYS_LOCK();
1080
1081	if (!shm_inited) {
1082		shminit(NULL);
1083	}
1084
1085	if (!IS_64BIT_PROCESS(p)) {
1086		ipcs_sz = sizeof(struct user32_IPCS_command);
1087		shmid_ds_sz = sizeof(struct user32_shmid_ds);
1088	}
1089
1090	/* Copy in the command structure */
1091	if ((error = SYSCTL_IN(req, &ipcs, ipcs_sz)) != 0) {
1092		goto ipcs_shm_sysctl_out;
1093	}
1094
1095	if (!IS_64BIT_PROCESS(p))	/* convert in place */
1096		ipcs.u64.ipcs_data = CAST_USER_ADDR_T(ipcs.u32.ipcs_data);
1097
1098	/* Let us version this interface... */
1099	if (ipcs.u64.ipcs_magic != IPCS_MAGIC) {
1100		error = EINVAL;
1101		goto ipcs_shm_sysctl_out;
1102	}
1103
1104	switch(ipcs.u64.ipcs_op) {
1105	case IPCS_SHM_CONF:	/* Obtain global configuration data */
1106		if (ipcs.u64.ipcs_datalen != sizeof(struct shminfo)) {
1107			if (ipcs.u64.ipcs_cursor != 0) { /* fwd. compat. */
1108				error = ENOMEM;
1109				break;
1110			}
1111			error = ERANGE;
1112			break;
1113		}
1114		error = copyout(&shminfo, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
1115		break;
1116
1117	case IPCS_SHM_ITER:	/* Iterate over existing segments */
1118		cursor = ipcs.u64.ipcs_cursor;
1119		if (cursor < 0 || cursor >= shminfo.shmmni) {
1120			error = ERANGE;
1121			break;
1122		}
1123		if (ipcs.u64.ipcs_datalen != (int)shmid_ds_sz) {
1124			error = EINVAL;
1125			break;
1126		}
1127		for( ; cursor < shminfo.shmmni; cursor++) {
1128			if (shmsegs[cursor].u.shm_perm.mode & SHMSEG_ALLOCATED)
1129				break;
1130			continue;
1131		}
1132		if (cursor == shminfo.shmmni) {
1133			error = ENOENT;
1134			break;
1135		}
1136
1137		shmid_dsp = &shmsegs[cursor];	/* default: 64 bit */
1138
1139		/*
1140		 * If necessary, convert the 64 bit kernel segment
1141		 * descriptor to a 32 bit user one.
1142		 */
1143		if (!IS_64BIT_PROCESS(p)) {
1144			shmid_ds_64to32(shmid_dsp, &shmid_ds32);
1145			shmid_dsp = &shmid_ds32;
1146		}
1147		error = copyout(shmid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
1148		if (!error) {
1149			/* update cursor */
1150			ipcs.u64.ipcs_cursor = cursor + 1;
1151
1152		if (!IS_64BIT_PROCESS(p))	/* convert in place */
1153			ipcs.u32.ipcs_data = CAST_DOWN_EXPLICIT(user32_addr_t,ipcs.u64.ipcs_data);
1154
1155		error = SYSCTL_OUT(req, &ipcs, ipcs_sz);
1156		}
1157		break;
1158
1159	default:
1160		error = EINVAL;
1161		break;
1162	}
1163ipcs_shm_sysctl_out:
1164	SYSV_SHM_SUBSYS_UNLOCK();
1165	return(error);
1166}
1167
1168SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 0, "SYSV");
1169
1170SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmax, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
1171    &shminfo.shmmax, 0, &sysctl_shminfo ,"Q","shmmax");
1172
1173SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmin, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
1174    &shminfo.shmmin, 0, &sysctl_shminfo ,"Q","shmmin");
1175
1176SYSCTL_PROC(_kern_sysv, OID_AUTO, shmmni, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
1177    &shminfo.shmmni, 0, &sysctl_shminfo ,"Q","shmmni");
1178
1179SYSCTL_PROC(_kern_sysv, OID_AUTO, shmseg, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
1180    &shminfo.shmseg, 0, &sysctl_shminfo ,"Q","shmseg");
1181
1182SYSCTL_PROC(_kern_sysv, OID_AUTO, shmall, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
1183    &shminfo.shmall, 0, &sysctl_shminfo ,"Q","shmall");
1184
1185SYSCTL_NODE(_kern_sysv, OID_AUTO, ipcs, CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY, 0, "SYSVIPCS");
1186
1187SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, shm, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
1188	0, 0, IPCS_shm_sysctl,
1189	"S,IPCS_shm_command",
1190	"ipcs shm command interface");
1191#endif /* SYSV_SHM */
1192
1193/* DSEP Review Done pl-20051108-v02 @2743,@2908,@2913,@3009 */
1194