uipc_shm.c revision 225344
150974Swpaul/*-
250974Swpaul * Copyright (c) 2006, 2011 Robert N. M. Watson
350974Swpaul * All rights reserved.
450974Swpaul *
550974Swpaul * Redistribution and use in source and binary forms, with or without
650974Swpaul * modification, are permitted provided that the following conditions
750974Swpaul * are met:
850974Swpaul * 1. Redistributions of source code must retain the above copyright
950974Swpaul *    notice, this list of conditions and the following disclaimer.
1050974Swpaul * 2. Redistributions in binary form must reproduce the above copyright
1150974Swpaul *    notice, this list of conditions and the following disclaimer in the
1250974Swpaul *    documentation and/or other materials provided with the distribution.
1350974Swpaul *
1450974Swpaul * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1550974Swpaul * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1650974Swpaul * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1750974Swpaul * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1850974Swpaul * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1950974Swpaul * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2050974Swpaul * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2150974Swpaul * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2250974Swpaul * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2350974Swpaul * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2450974Swpaul * SUCH DAMAGE.
2550974Swpaul */
2650974Swpaul
2750974Swpaul/*
2850974Swpaul * Support for shared swap-backed anonymous memory objects via
2950974Swpaul * shm_open(2) and shm_unlink(2).  While most of the implementation is
3050974Swpaul * here, vm_mmap.c contains mapping logic changes.
3150974Swpaul *
3250974Swpaul * TODO:
3350974Swpaul *
3450974Swpaul * (1) Need to export data to a userland tool via a sysctl.  Should ipcs(1)
3550974Swpaul *     and ipcrm(1) be expanded or should new tools to manage both POSIX
3650974Swpaul *     kernel semaphores and POSIX shared memory be written?
3750974Swpaul *
3850974Swpaul * (2) Add support for this file type to fstat(1).
3964963Swpaul *
4064963Swpaul * (3) Resource limits?  Does this need its own resource limits or are the
4164963Swpaul *     existing limits in mmap(2) sufficient?
4250974Swpaul *
4350974Swpaul * (4) Partial page truncation.  vnode_pager_setsize() will zero any parts
4450974Swpaul *     of a partially mapped page as a result of ftruncate(2)/truncate(2).
4550974Swpaul *     We can do the same (with the same pmap evil), but do we need to
4650974Swpaul *     worry about the bits on disk if the page is swapped out or will the
4750974Swpaul *     swapper zero the parts of a page that are invalid if the page is
4850974Swpaul *     swapped back in for us?
4950974Swpaul */
5050974Swpaul
5150974Swpaul#include <sys/cdefs.h>
5250974Swpaul__FBSDID("$FreeBSD: head/sys/kern/uipc_shm.c 225344 2011-09-02 17:40:39Z rwatson $");
5350974Swpaul
5450974Swpaul#include "opt_capsicum.h"
5550974Swpaul
5650974Swpaul#include <sys/param.h>
5750974Swpaul#include <sys/capability.h>
5850974Swpaul#include <sys/fcntl.h>
5950974Swpaul#include <sys/file.h>
6050974Swpaul#include <sys/filedesc.h>
6150974Swpaul#include <sys/fnv_hash.h>
6250974Swpaul#include <sys/kernel.h>
6350974Swpaul#include <sys/lock.h>
6450974Swpaul#include <sys/malloc.h>
6550974Swpaul#include <sys/mman.h>
6650974Swpaul#include <sys/mutex.h>
6787059Sluigi#include <sys/priv.h>
6850974Swpaul#include <sys/proc.h>
6950974Swpaul#include <sys/refcount.h>
7050974Swpaul#include <sys/resourcevar.h>
7150974Swpaul#include <sys/stat.h>
7250974Swpaul#include <sys/sysctl.h>
7350974Swpaul#include <sys/sysproto.h>
7487390Sjhay#include <sys/systm.h>
7587390Sjhay#include <sys/sx.h>
7650974Swpaul#include <sys/time.h>
7750974Swpaul#include <sys/vnode.h>
7850974Swpaul
7950974Swpaul#include <security/mac/mac_framework.h>
8050974Swpaul
8150974Swpaul#include <vm/vm.h>
8250974Swpaul#include <vm/vm_param.h>
8350974Swpaul#include <vm/pmap.h>
8450974Swpaul#include <vm/vm_map.h>
8550974Swpaul#include <vm/vm_object.h>
8650974Swpaul#include <vm/vm_page.h>
8750974Swpaul#include <vm/vm_pager.h>
8850974Swpaul#include <vm/swap_pager.h>
8950974Swpaul
9050974Swpaulstruct shm_mapping {
9150974Swpaul	char		*sm_path;
9250974Swpaul	Fnv32_t		sm_fnv;
9350974Swpaul	struct shmfd	*sm_shmfd;
9450974Swpaul	LIST_ENTRY(shm_mapping) sm_link;
9550974Swpaul};
9659758Speter
9759758Speterstatic MALLOC_DEFINE(M_SHMFD, "shmfd", "shared memory file descriptor");
9851089Speterstatic LIST_HEAD(, shm_mapping) *shm_dictionary;
9950974Swpaulstatic struct sx shm_dict_lock;
10050974Swpaulstatic struct mtx shm_timestamp_lock;
10150974Swpaulstatic u_long shm_hash;
10250974Swpaul
10350974Swpaul#define	SHM_HASH(fnv)	(&shm_dictionary[(fnv) & shm_hash])
10450974Swpaul
10550974Swpaulstatic int	shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags);
10650974Swpaulstatic struct shmfd *shm_alloc(struct ucred *ucred, mode_t mode);
10750974Swpaulstatic void	shm_dict_init(void *arg);
10850974Swpaulstatic void	shm_drop(struct shmfd *shmfd);
10950974Swpaulstatic struct shmfd *shm_hold(struct shmfd *shmfd);
11050974Swpaulstatic void	shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd);
11150974Swpaulstatic struct shmfd *shm_lookup(char *path, Fnv32_t fnv);
11262672Swpaulstatic int	shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred);
11350974Swpaulstatic int	shm_dotruncate(struct shmfd *shmfd, off_t length);
11450974Swpaul
11550974Swpaulstatic fo_rdwr_t	shm_read;
11650974Swpaulstatic fo_rdwr_t	shm_write;
11750974Swpaulstatic fo_truncate_t	shm_truncate;
11850974Swpaulstatic fo_ioctl_t	shm_ioctl;
11950974Swpaulstatic fo_poll_t	shm_poll;
12050974Swpaulstatic fo_kqfilter_t	shm_kqfilter;
12150974Swpaulstatic fo_stat_t	shm_stat;
12250974Swpaulstatic fo_close_t	shm_close;
12350974Swpaulstatic fo_chmod_t	shm_chmod;
12450974Swpaulstatic fo_chown_t	shm_chown;
12550974Swpaul
12650974Swpaul/* File descriptor operations. */
12750974Swpaulstatic struct fileops shm_ops = {
12850974Swpaul	.fo_read = shm_read,
12950974Swpaul	.fo_write = shm_write,
13050974Swpaul	.fo_truncate = shm_truncate,
13150974Swpaul	.fo_ioctl = shm_ioctl,
13250974Swpaul	.fo_poll = shm_poll,
13350974Swpaul	.fo_kqfilter = shm_kqfilter,
13450974Swpaul	.fo_stat = shm_stat,
13550974Swpaul	.fo_close = shm_close,
13650974Swpaul	.fo_chmod = shm_chmod,
13750974Swpaul	.fo_chown = shm_chown,
13850974Swpaul	.fo_flags = DFLAG_PASSABLE
13962672Swpaul};
14050974Swpaul
14150974SwpaulFEATURE(posix_shm, "POSIX shared memory");
14250974Swpaul
14350974Swpaulstatic int
14450974Swpaulshm_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
14550974Swpaul    int flags, struct thread *td)
14672197Swpaul{
14772197Swpaul
14872197Swpaul	return (EOPNOTSUPP);
14972197Swpaul}
15072197Swpaul
15172197Swpaulstatic int
15250974Swpaulshm_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
15350974Swpaul    int flags, struct thread *td)
15450974Swpaul{
15550974Swpaul
15662672Swpaul	return (EOPNOTSUPP);
15762672Swpaul}
15862672Swpaul
15950974Swpaulstatic int
16050974Swpaulshm_truncate(struct file *fp, off_t length, struct ucred *active_cred,
16150974Swpaul    struct thread *td)
16250974Swpaul{
16381713Swpaul	struct shmfd *shmfd;
16481713Swpaul#ifdef MAC
16581713Swpaul	int error;
16681713Swpaul#endif
16781713Swpaul
16881713Swpaul	shmfd = fp->f_data;
16950974Swpaul#ifdef MAC
17050974Swpaul	error = mac_posixshm_check_truncate(active_cred, fp->f_cred, shmfd);
17150974Swpaul	if (error)
17250974Swpaul		return (error);
17351030Swpaul#endif
17451030Swpaul	return (shm_dotruncate(shmfd, length));
17550974Swpaul}
17650974Swpaul
17750974Swpaulstatic int
17850974Swpaulshm_ioctl(struct file *fp, u_long com, void *data,
17950974Swpaul    struct ucred *active_cred, struct thread *td)
18050974Swpaul{
18150974Swpaul
18250974Swpaul	return (EOPNOTSUPP);
18350974Swpaul}
18450974Swpaul
18550974Swpaulstatic int
18650974Swpaulshm_poll(struct file *fp, int events, struct ucred *active_cred,
18750974Swpaul    struct thread *td)
18850974Swpaul{
18950974Swpaul
19050974Swpaul	return (EOPNOTSUPP);
19150974Swpaul}
19250974Swpaul
19350974Swpaulstatic int
19450974Swpaulshm_kqfilter(struct file *fp, struct knote *kn)
19550974Swpaul{
19650974Swpaul
19751455Swpaul	return (EOPNOTSUPP);
19850974Swpaul}
19950974Swpaul
20050974Swpaulstatic int
20150974Swpaulshm_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
20250974Swpaul    struct thread *td)
20350974Swpaul{
20487059Sluigi	struct shmfd *shmfd;
20587059Sluigi#ifdef MAC
20687059Sluigi	int error;
20787059Sluigi#endif
20887059Sluigi
20987059Sluigi	shmfd = fp->f_data;
21051533Swpaul
21151473Swpaul#ifdef MAC
21250974Swpaul	error = mac_posixshm_check_stat(active_cred, fp->f_cred, shmfd);
21350974Swpaul	if (error)
21450974Swpaul		return (error);
21550974Swpaul#endif
21650974Swpaul
21750974Swpaul	/*
21850974Swpaul	 * Attempt to return sanish values for fstat() on a memory file
21950974Swpaul	 * descriptor.
22050974Swpaul	 */
22150974Swpaul	bzero(sb, sizeof(*sb));
22250974Swpaul	sb->st_blksize = PAGE_SIZE;
22350974Swpaul	sb->st_size = shmfd->shm_size;
22450974Swpaul	sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize;
22550974Swpaul	mtx_lock(&shm_timestamp_lock);
22650974Swpaul	sb->st_atim = shmfd->shm_atime;
22781713Swpaul	sb->st_ctim = shmfd->shm_ctime;
22881713Swpaul	sb->st_mtim = shmfd->shm_mtime;
22981713Swpaul	sb->st_birthtim = shmfd->shm_birthtime;
23081713Swpaul	sb->st_mode = S_IFREG | shmfd->shm_mode;		/* XXX */
23181713Swpaul	sb->st_uid = shmfd->shm_uid;
23281713Swpaul	sb->st_gid = shmfd->shm_gid;
23381713Swpaul	mtx_unlock(&shm_timestamp_lock);
23481713Swpaul
23581713Swpaul	return (0);
23681713Swpaul}
23781713Swpaul
23881713Swpaulstatic int
23981713Swpaulshm_close(struct file *fp, struct thread *td)
24081713Swpaul{
24181713Swpaul	struct shmfd *shmfd;
24281713Swpaul
24381713Swpaul	shmfd = fp->f_data;
24481713Swpaul	fp->f_data = NULL;
24581713Swpaul	shm_drop(shmfd);
24681713Swpaul
24781713Swpaul	return (0);
24881713Swpaul}
24981713Swpaul
25081713Swpaulstatic int
25181713Swpaulshm_dotruncate(struct shmfd *shmfd, off_t length)
25281713Swpaul{
25381713Swpaul	vm_object_t object;
25481713Swpaul	vm_page_t m;
25581713Swpaul	vm_pindex_t nobjsize;
25681713Swpaul	vm_ooffset_t delta;
25781713Swpaul
25881713Swpaul	object = shmfd->shm_object;
25981713Swpaul	VM_OBJECT_LOCK(object);
26081713Swpaul	if (length == shmfd->shm_size) {
26181713Swpaul		VM_OBJECT_UNLOCK(object);
26281713Swpaul		return (0);
26381713Swpaul	}
26481713Swpaul	nobjsize = OFF_TO_IDX(length + PAGE_MASK);
26581713Swpaul
26681713Swpaul	/* Are we shrinking?  If so, trim the end. */
26781713Swpaul	if (length < shmfd->shm_size) {
26881713Swpaul		delta = ptoa(object->size - nobjsize);
26962672Swpaul
27062672Swpaul		/* Toss in memory pages. */
27162672Swpaul		if (nobjsize < object->size)
27262672Swpaul			vm_object_page_remove(object, nobjsize, object->size,
27362672Swpaul			    0);
27462672Swpaul
27562672Swpaul		/* Toss pages from swap. */
27662672Swpaul		if (object->type == OBJT_SWAP)
27762672Swpaul			swap_pager_freespace(object, nobjsize, delta);
27862672Swpaul
27962672Swpaul		/* Free the swap accounted for shm */
28062672Swpaul		swap_release_by_cred(delta, object->cred);
28162672Swpaul		object->charge -= delta;
28262672Swpaul
28362672Swpaul		/*
28450974Swpaul		 * If the last page is partially mapped, then zero out
28550974Swpaul		 * the garbage at the end of the page.  See comments
28650974Swpaul		 * in vnode_pager_setsize() for more details.
28750974Swpaul		 *
28850974Swpaul		 * XXXJHB: This handles in memory pages, but what about
28950974Swpaul		 * a page swapped out to disk?
29050974Swpaul		 */
29150974Swpaul		if ((length & PAGE_MASK) &&
29250974Swpaul		    (m = vm_page_lookup(object, OFF_TO_IDX(length))) != NULL &&
29350974Swpaul		    m->valid != 0) {
29450974Swpaul			int base = (int)length & PAGE_MASK;
29550974Swpaul			int size = PAGE_SIZE - base;
29650974Swpaul
29750974Swpaul			pmap_zero_page_area(m, base, size);
29850974Swpaul
29950974Swpaul			/*
30050974Swpaul			 * Update the valid bits to reflect the blocks that
30150974Swpaul			 * have been zeroed.  Some of these valid bits may
30250974Swpaul			 * have already been set.
30350974Swpaul			 */
30450974Swpaul			vm_page_set_valid(m, base, size);
30550974Swpaul
30650974Swpaul			/*
30750974Swpaul			 * Round "base" to the next block boundary so that the
30850974Swpaul			 * dirty bit for a partially zeroed block is not
30950974Swpaul			 * cleared.
31050974Swpaul			 */
31150974Swpaul			base = roundup2(base, DEV_BSIZE);
31250974Swpaul
31350974Swpaul			vm_page_clear_dirty(m, base, PAGE_SIZE - base);
31450974Swpaul		} else if ((length & PAGE_MASK) &&
31550974Swpaul		    __predict_false(object->cache != NULL)) {
31650974Swpaul			vm_page_cache_free(object, OFF_TO_IDX(length),
31750974Swpaul			    nobjsize);
31850974Swpaul		}
31950974Swpaul	} else {
32050974Swpaul
32150974Swpaul		/* Attempt to reserve the swap */
32250974Swpaul		delta = ptoa(nobjsize - object->size);
32350974Swpaul		if (!swap_reserve_by_cred(delta, object->cred)) {
32450974Swpaul			VM_OBJECT_UNLOCK(object);
32550974Swpaul			return (ENOMEM);
32650974Swpaul		}
32750974Swpaul		object->charge += delta;
32850974Swpaul	}
32950974Swpaul	shmfd->shm_size = length;
33050974Swpaul	mtx_lock(&shm_timestamp_lock);
33150974Swpaul	vfs_timestamp(&shmfd->shm_ctime);
33250974Swpaul	shmfd->shm_mtime = shmfd->shm_ctime;
33350974Swpaul	mtx_unlock(&shm_timestamp_lock);
33450974Swpaul	object->size = nobjsize;
33550974Swpaul	VM_OBJECT_UNLOCK(object);
33650974Swpaul	return (0);
33750974Swpaul}
33850974Swpaul
33950974Swpaul/*
34050974Swpaul * shmfd object management including creation and reference counting
34150974Swpaul * routines.
34250974Swpaul */
34350974Swpaulstatic struct shmfd *
34450974Swpaulshm_alloc(struct ucred *ucred, mode_t mode)
34550974Swpaul{
34650974Swpaul	struct shmfd *shmfd;
34750974Swpaul
34850974Swpaul	shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO);
34950974Swpaul	shmfd->shm_size = 0;
35050974Swpaul	shmfd->shm_uid = ucred->cr_uid;
35150974Swpaul	shmfd->shm_gid = ucred->cr_gid;
35250974Swpaul	shmfd->shm_mode = mode;
35350974Swpaul	shmfd->shm_object = vm_pager_allocate(OBJT_DEFAULT, NULL,
35450974Swpaul	    shmfd->shm_size, VM_PROT_DEFAULT, 0, ucred);
35550974Swpaul	KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate"));
35650974Swpaul	VM_OBJECT_LOCK(shmfd->shm_object);
35750974Swpaul	vm_object_clear_flag(shmfd->shm_object, OBJ_ONEMAPPING);
35850974Swpaul	vm_object_set_flag(shmfd->shm_object, OBJ_NOSPLIT);
35950974Swpaul	VM_OBJECT_UNLOCK(shmfd->shm_object);
36050974Swpaul	vfs_timestamp(&shmfd->shm_birthtime);
36150974Swpaul	shmfd->shm_atime = shmfd->shm_mtime = shmfd->shm_ctime =
36250974Swpaul	    shmfd->shm_birthtime;
36350974Swpaul	refcount_init(&shmfd->shm_refs, 1);
36450974Swpaul#ifdef MAC
36550974Swpaul	mac_posixshm_init(shmfd);
36650974Swpaul	mac_posixshm_create(ucred, shmfd);
36762672Swpaul#endif
36862672Swpaul
36950974Swpaul	return (shmfd);
37050974Swpaul}
37150974Swpaul
37250974Swpaulstatic struct shmfd *
37350974Swpaulshm_hold(struct shmfd *shmfd)
37450974Swpaul{
37550974Swpaul
37650974Swpaul	refcount_acquire(&shmfd->shm_refs);
37750974Swpaul	return (shmfd);
37850974Swpaul}
37950974Swpaul
38050974Swpaulstatic void
38150974Swpaulshm_drop(struct shmfd *shmfd)
38250974Swpaul{
38350974Swpaul
38450974Swpaul	if (refcount_release(&shmfd->shm_refs)) {
38550974Swpaul#ifdef MAC
38650974Swpaul		mac_posixshm_destroy(shmfd);
38750974Swpaul#endif
38850974Swpaul		vm_object_deallocate(shmfd->shm_object);
38950974Swpaul		free(shmfd, M_SHMFD);
39050974Swpaul	}
39150974Swpaul}
39250974Swpaul
39350974Swpaul/*
39450974Swpaul * Determine if the credentials have sufficient permissions for a
39550974Swpaul * specified combination of FREAD and FWRITE.
39650974Swpaul */
39750974Swpaulstatic int
39850974Swpaulshm_access(struct shmfd *shmfd, struct ucred *ucred, int flags)
39950974Swpaul{
40050974Swpaul	accmode_t accmode;
40150974Swpaul	int error;
40250974Swpaul
40350974Swpaul	accmode = 0;
40450974Swpaul	if (flags & FREAD)
40550974Swpaul		accmode |= VREAD;
40650974Swpaul	if (flags & FWRITE)
40750974Swpaul		accmode |= VWRITE;
40850974Swpaul	mtx_lock(&shm_timestamp_lock);
40950974Swpaul	error = vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid,
41050974Swpaul	    accmode, ucred, NULL);
41150974Swpaul	mtx_unlock(&shm_timestamp_lock);
41250974Swpaul	return (error);
41350974Swpaul}
41450974Swpaul
41550974Swpaul/*
41650974Swpaul * Dictionary management.  We maintain an in-kernel dictionary to map
41750974Swpaul * paths to shmfd objects.  We use the FNV hash on the path to store
41850974Swpaul * the mappings in a hash table.
41950974Swpaul */
42050974Swpaulstatic void
42150974Swpaulshm_dict_init(void *arg)
42250974Swpaul{
42372197Swpaul
42472197Swpaul	mtx_init(&shm_timestamp_lock, "shm timestamps", NULL, MTX_DEF);
42572197Swpaul	sx_init(&shm_dict_lock, "shm dictionary");
42672197Swpaul	shm_dictionary = hashinit(1024, M_SHMFD, &shm_hash);
42772197Swpaul}
42872197SwpaulSYSINIT(shm_dict_init, SI_SUB_SYSV_SHM, SI_ORDER_ANY, shm_dict_init, NULL);
42972197Swpaul
43072197Swpaulstatic struct shmfd *
43172197Swpaulshm_lookup(char *path, Fnv32_t fnv)
43272197Swpaul{
43372197Swpaul	struct shm_mapping *map;
43472197Swpaul
43572197Swpaul	LIST_FOREACH(map, SHM_HASH(fnv), sm_link) {
43672197Swpaul		if (map->sm_fnv != fnv)
43772197Swpaul			continue;
43872197Swpaul		if (strcmp(map->sm_path, path) == 0)
43972197Swpaul			return (map->sm_shmfd);
44072197Swpaul	}
44172197Swpaul
44272197Swpaul	return (NULL);
44372197Swpaul}
44472197Swpaul
44572197Swpaulstatic void
44672197Swpaulshm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd)
44772197Swpaul{
44872197Swpaul	struct shm_mapping *map;
44972197Swpaul
45072197Swpaul	map = malloc(sizeof(struct shm_mapping), M_SHMFD, M_WAITOK);
45172197Swpaul	map->sm_path = path;
45272197Swpaul	map->sm_fnv = fnv;
45372197Swpaul	map->sm_shmfd = shm_hold(shmfd);
45472197Swpaul	LIST_INSERT_HEAD(SHM_HASH(fnv), map, sm_link);
45572197Swpaul}
45672197Swpaul
45772197Swpaulstatic int
45872197Swpaulshm_remove(char *path, Fnv32_t fnv, struct ucred *ucred)
45972197Swpaul{
46072197Swpaul	struct shm_mapping *map;
46172197Swpaul	int error;
46272197Swpaul
46372197Swpaul	LIST_FOREACH(map, SHM_HASH(fnv), sm_link) {
46472197Swpaul		if (map->sm_fnv != fnv)
46572197Swpaul			continue;
46672197Swpaul		if (strcmp(map->sm_path, path) == 0) {
46772197Swpaul#ifdef MAC
46872197Swpaul			error = mac_posixshm_check_unlink(ucred, map->sm_shmfd);
46972197Swpaul			if (error)
47072197Swpaul				return (error);
47172197Swpaul#endif
47272197Swpaul			error = shm_access(map->sm_shmfd, ucred,
47372197Swpaul			    FREAD | FWRITE);
47472197Swpaul			if (error)
47572197Swpaul				return (error);
47672197Swpaul			LIST_REMOVE(map, sm_link);
47772197Swpaul			shm_drop(map->sm_shmfd);
47872197Swpaul			free(map->sm_path, M_SHMFD);
47972197Swpaul			free(map, M_SHMFD);
48072197Swpaul			return (0);
48172197Swpaul		}
48272197Swpaul	}
48372197Swpaul
48472197Swpaul	return (ENOENT);
48572197Swpaul}
48672197Swpaul
48772197Swpaul/* System calls. */
48872197Swpaulint
48972197Swpaulshm_open(struct thread *td, struct shm_open_args *uap)
49050974Swpaul{
49150974Swpaul	struct filedesc *fdp;
49250974Swpaul	struct shmfd *shmfd;
49350974Swpaul	struct file *fp;
49450974Swpaul	char *path;
49562672Swpaul	Fnv32_t fnv;
49650974Swpaul	mode_t cmode;
49750974Swpaul	int fd, error;
49850974Swpaul
49962672Swpaul#ifdef CAPABILITY_MODE
50062672Swpaul	/*
50162672Swpaul	 * shm_open(2) is only allowed for anonymous objects.
50262672Swpaul	 */
50362672Swpaul	if (IN_CAPABILITY_MODE(td) && (uap->path != SHM_ANON))
50462672Swpaul		return (ECAPMODE);
50562672Swpaul#endif
50662672Swpaul
50762672Swpaul	if ((uap->flags & O_ACCMODE) != O_RDONLY &&
50862672Swpaul	    (uap->flags & O_ACCMODE) != O_RDWR)
50962672Swpaul		return (EINVAL);
51062672Swpaul
51162672Swpaul	if ((uap->flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC)) != 0)
51262672Swpaul		return (EINVAL);
51362672Swpaul
51462672Swpaul	fdp = td->td_proc->p_fd;
51562672Swpaul	cmode = (uap->mode & ~fdp->fd_cmask) & ACCESSPERMS;
51662672Swpaul
51762672Swpaul	error = falloc(td, &fp, &fd, 0);
51850974Swpaul	if (error)
51950974Swpaul		return (error);
52050974Swpaul
52150974Swpaul	/* A SHM_ANON path pointer creates an anonymous object. */
52250974Swpaul	if (uap->path == SHM_ANON) {
52350974Swpaul		/* A read-only anonymous object is pointless. */
52450974Swpaul		if ((uap->flags & O_ACCMODE) == O_RDONLY) {
52550974Swpaul			fdclose(fdp, fp, fd, td);
52650974Swpaul			fdrop(fp, td);
52750974Swpaul			return (EINVAL);
52850974Swpaul		}
52950974Swpaul		shmfd = shm_alloc(td->td_ucred, cmode);
53050974Swpaul	} else {
53150974Swpaul		path = malloc(MAXPATHLEN, M_SHMFD, M_WAITOK);
53250974Swpaul		error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
53350974Swpaul
53450974Swpaul		/* Require paths to start with a '/' character. */
53550974Swpaul		if (error == 0 && path[0] != '/')
53650974Swpaul			error = EINVAL;
53750974Swpaul		if (error) {
53850974Swpaul			fdclose(fdp, fp, fd, td);
53950974Swpaul			fdrop(fp, td);
54050974Swpaul			free(path, M_SHMFD);
54150974Swpaul			return (error);
54250974Swpaul		}
54350974Swpaul
54450974Swpaul		fnv = fnv_32_str(path, FNV1_32_INIT);
54550974Swpaul		sx_xlock(&shm_dict_lock);
54650974Swpaul		shmfd = shm_lookup(path, fnv);
54750974Swpaul		if (shmfd == NULL) {
54850974Swpaul			/* Object does not yet exist, create it if requested. */
54950974Swpaul			if (uap->flags & O_CREAT) {
55050974Swpaul#ifdef MAC
55162672Swpaul				error = mac_posixshm_check_create(td->td_ucred,
55262672Swpaul				    path);
55362672Swpaul				if (error == 0) {
55462672Swpaul#endif
55562672Swpaul					shmfd = shm_alloc(td->td_ucred, cmode);
55662672Swpaul					shm_insert(path, fnv, shmfd);
55762672Swpaul#ifdef MAC
55850974Swpaul				}
55950974Swpaul#endif
56050974Swpaul			} else {
56150974Swpaul				free(path, M_SHMFD);
56250974Swpaul				error = ENOENT;
56350974Swpaul			}
56450974Swpaul		} else {
56550974Swpaul			/*
56650974Swpaul			 * Object already exists, obtain a new
56750974Swpaul			 * reference if requested and permitted.
56850974Swpaul			 */
56950974Swpaul			free(path, M_SHMFD);
57050974Swpaul			if ((uap->flags & (O_CREAT | O_EXCL)) ==
57150974Swpaul			    (O_CREAT | O_EXCL))
57250974Swpaul				error = EEXIST;
57350974Swpaul			else {
57450974Swpaul#ifdef MAC
57550974Swpaul				error = mac_posixshm_check_open(td->td_ucred,
57650974Swpaul				    shmfd, FFLAGS(uap->flags & O_ACCMODE));
57750974Swpaul				if (error == 0)
57850974Swpaul#endif
57950974Swpaul				error = shm_access(shmfd, td->td_ucred,
58050974Swpaul				    FFLAGS(uap->flags & O_ACCMODE));
58150974Swpaul			}
58264963Swpaul
58350974Swpaul			/*
58450974Swpaul			 * Truncate the file back to zero length if
58550974Swpaul			 * O_TRUNC was specified and the object was
58650974Swpaul			 * opened with read/write.
58762672Swpaul			 */
58862672Swpaul			if (error == 0 &&
58950974Swpaul			    (uap->flags & (O_ACCMODE | O_TRUNC)) ==
59050974Swpaul			    (O_RDWR | O_TRUNC)) {
59150974Swpaul#ifdef MAC
59250974Swpaul				error = mac_posixshm_check_truncate(
59350974Swpaul					td->td_ucred, fp->f_cred, shmfd);
59450974Swpaul				if (error == 0)
59550974Swpaul#endif
59650974Swpaul					shm_dotruncate(shmfd, 0);
59750974Swpaul			}
59850974Swpaul			if (error == 0)
59950974Swpaul				shm_hold(shmfd);
60050974Swpaul		}
60150974Swpaul		sx_xunlock(&shm_dict_lock);
60250974Swpaul
60350974Swpaul		if (error) {
60450974Swpaul			fdclose(fdp, fp, fd, td);
60550974Swpaul			fdrop(fp, td);
60650974Swpaul			return (error);
60750974Swpaul		}
60850974Swpaul	}
60962672Swpaul
61062672Swpaul	finit(fp, FFLAGS(uap->flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops);
61162672Swpaul
61262672Swpaul	FILEDESC_XLOCK(fdp);
61362672Swpaul	if (fdp->fd_ofiles[fd] == fp)
61462672Swpaul		fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
61562672Swpaul	FILEDESC_XUNLOCK(fdp);
61662672Swpaul	td->td_retval[0] = fd;
61762672Swpaul	fdrop(fp, td);
61850974Swpaul
61950974Swpaul	return (0);
62050974Swpaul}
62162672Swpaul
62250974Swpaulint
62350974Swpaulshm_unlink(struct thread *td, struct shm_unlink_args *uap)
62450974Swpaul{
62550974Swpaul	char *path;
62650974Swpaul	Fnv32_t fnv;
62762672Swpaul	int error;
62850974Swpaul
62950974Swpaul	path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
63050974Swpaul	error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
63150974Swpaul	if (error) {
63262672Swpaul		free(path, M_TEMP);
63350974Swpaul		return (error);
63450974Swpaul	}
63550974Swpaul
63650974Swpaul	fnv = fnv_32_str(path, FNV1_32_INIT);
63762672Swpaul	sx_xlock(&shm_dict_lock);
63862672Swpaul	error = shm_remove(path, fnv, td->td_ucred);
63962672Swpaul	sx_xunlock(&shm_dict_lock);
64062672Swpaul	free(path, M_TEMP);
64162672Swpaul
64250974Swpaul	return (error);
64350974Swpaul}
64450974Swpaul
64550974Swpaul/*
64650974Swpaul * mmap() helper to validate mmap() requests against shm object state
64762672Swpaul * and give mmap() the vm_object to use for the mapping.
64862672Swpaul */
64962672Swpaulint
65062672Swpaulshm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff,
65162672Swpaul    vm_object_t *obj)
65272084Sphk{
65362672Swpaul
65462672Swpaul	/*
65562672Swpaul	 * XXXRW: This validation is probably insufficient, and subject to
65662672Swpaul	 * sign errors.  It should be fixed.
65762672Swpaul	 */
65862672Swpaul	if (foff >= shmfd->shm_size ||
65962672Swpaul	    foff + objsize > round_page(shmfd->shm_size))
66062672Swpaul		return (EINVAL);
66162672Swpaul
66262672Swpaul	mtx_lock(&shm_timestamp_lock);
66362672Swpaul	vfs_timestamp(&shmfd->shm_atime);
66462672Swpaul	mtx_unlock(&shm_timestamp_lock);
66562672Swpaul	vm_object_reference(shmfd->shm_object);
66662672Swpaul	*obj = shmfd->shm_object;
66762672Swpaul	return (0);
66862672Swpaul}
66962672Swpaul
67062672Swpaulstatic int
67162672Swpaulshm_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
67262672Swpaul    struct thread *td)
67362672Swpaul{
67462672Swpaul	struct shmfd *shmfd;
67562672Swpaul	int error;
67662672Swpaul
67762672Swpaul	error = 0;
67862672Swpaul	shmfd = fp->f_data;
67962672Swpaul	mtx_lock(&shm_timestamp_lock);
68062672Swpaul	/*
68162672Swpaul	 * SUSv4 says that x bits of permission need not be affected.
68262672Swpaul	 * Be consistent with our shm_open there.
68362672Swpaul	 */
68462672Swpaul#ifdef MAC
68562672Swpaul	error = mac_posixshm_check_setmode(active_cred, shmfd, mode);
68662672Swpaul	if (error != 0)
68762672Swpaul		goto out;
68850974Swpaul#endif
68950974Swpaul	error = vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid,
69050974Swpaul	    shmfd->shm_gid, VADMIN, active_cred, NULL);
69150974Swpaul	if (error != 0)
69250974Swpaul		goto out;
69350974Swpaul	shmfd->shm_mode = mode & ACCESSPERMS;
69472084Sphkout:
69550974Swpaul	mtx_unlock(&shm_timestamp_lock);
69650974Swpaul	return (error);
69762672Swpaul}
69850974Swpaul
69950974Swpaulstatic int
70050974Swpaulshm_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
70150974Swpaul    struct thread *td)
70250974Swpaul{
70350974Swpaul	struct shmfd *shmfd;
70450974Swpaul	int error;
70550974Swpaul
70650974Swpaul	error = 0;
70750974Swpaul	shmfd = fp->f_data;
70850974Swpaul	mtx_lock(&shm_timestamp_lock);
70950974Swpaul#ifdef MAC
71050974Swpaul	error = mac_posixshm_check_setowner(active_cred, shmfd, uid, gid);
71150974Swpaul	if (error != 0)
71250974Swpaul		goto out;
71350974Swpaul#endif
71450974Swpaul	if (uid == (uid_t)-1)
71550974Swpaul		uid = shmfd->shm_uid;
71650974Swpaul	if (gid == (gid_t)-1)
71750974Swpaul                 gid = shmfd->shm_gid;
71850974Swpaul	if (((uid != shmfd->shm_uid && uid != active_cred->cr_uid) ||
71950974Swpaul	    (gid != shmfd->shm_gid && !groupmember(gid, active_cred))) &&
72050974Swpaul	    (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN, 0)))
72150974Swpaul		goto out;
72250974Swpaul	shmfd->shm_uid = uid;
72350974Swpaul	shmfd->shm_gid = gid;
72472813Swpaulout:
72572813Swpaul	mtx_unlock(&shm_timestamp_lock);
72672813Swpaul	return (error);
72772813Swpaul}
72872813Swpaul