uipc_shm.c revision 225344
150974Swpaul/*- 250974Swpaul * Copyright (c) 2006, 2011 Robert N. M. Watson 350974Swpaul * All rights reserved. 450974Swpaul * 550974Swpaul * Redistribution and use in source and binary forms, with or without 650974Swpaul * modification, are permitted provided that the following conditions 750974Swpaul * are met: 850974Swpaul * 1. Redistributions of source code must retain the above copyright 950974Swpaul * notice, this list of conditions and the following disclaimer. 1050974Swpaul * 2. Redistributions in binary form must reproduce the above copyright 1150974Swpaul * notice, this list of conditions and the following disclaimer in the 1250974Swpaul * documentation and/or other materials provided with the distribution. 1350974Swpaul * 1450974Swpaul * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1550974Swpaul * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1650974Swpaul * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1750974Swpaul * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 1850974Swpaul * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1950974Swpaul * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2050974Swpaul * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2150974Swpaul * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2250974Swpaul * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2350974Swpaul * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2450974Swpaul * SUCH DAMAGE. 2550974Swpaul */ 2650974Swpaul 2750974Swpaul/* 2850974Swpaul * Support for shared swap-backed anonymous memory objects via 2950974Swpaul * shm_open(2) and shm_unlink(2). While most of the implementation is 3050974Swpaul * here, vm_mmap.c contains mapping logic changes. 3150974Swpaul * 3250974Swpaul * TODO: 3350974Swpaul * 3450974Swpaul * (1) Need to export data to a userland tool via a sysctl. Should ipcs(1) 3550974Swpaul * and ipcrm(1) be expanded or should new tools to manage both POSIX 3650974Swpaul * kernel semaphores and POSIX shared memory be written? 3750974Swpaul * 3850974Swpaul * (2) Add support for this file type to fstat(1). 3964963Swpaul * 4064963Swpaul * (3) Resource limits? Does this need its own resource limits or are the 4164963Swpaul * existing limits in mmap(2) sufficient? 4250974Swpaul * 4350974Swpaul * (4) Partial page truncation. vnode_pager_setsize() will zero any parts 4450974Swpaul * of a partially mapped page as a result of ftruncate(2)/truncate(2). 4550974Swpaul * We can do the same (with the same pmap evil), but do we need to 4650974Swpaul * worry about the bits on disk if the page is swapped out or will the 4750974Swpaul * swapper zero the parts of a page that are invalid if the page is 4850974Swpaul * swapped back in for us? 4950974Swpaul */ 5050974Swpaul 5150974Swpaul#include <sys/cdefs.h> 5250974Swpaul__FBSDID("$FreeBSD: head/sys/kern/uipc_shm.c 225344 2011-09-02 17:40:39Z rwatson $"); 5350974Swpaul 5450974Swpaul#include "opt_capsicum.h" 5550974Swpaul 5650974Swpaul#include <sys/param.h> 5750974Swpaul#include <sys/capability.h> 5850974Swpaul#include <sys/fcntl.h> 5950974Swpaul#include <sys/file.h> 6050974Swpaul#include <sys/filedesc.h> 6150974Swpaul#include <sys/fnv_hash.h> 6250974Swpaul#include <sys/kernel.h> 6350974Swpaul#include <sys/lock.h> 6450974Swpaul#include <sys/malloc.h> 6550974Swpaul#include <sys/mman.h> 6650974Swpaul#include <sys/mutex.h> 6787059Sluigi#include <sys/priv.h> 6850974Swpaul#include <sys/proc.h> 6950974Swpaul#include <sys/refcount.h> 7050974Swpaul#include <sys/resourcevar.h> 7150974Swpaul#include <sys/stat.h> 7250974Swpaul#include <sys/sysctl.h> 7350974Swpaul#include <sys/sysproto.h> 7487390Sjhay#include <sys/systm.h> 7587390Sjhay#include <sys/sx.h> 7650974Swpaul#include <sys/time.h> 7750974Swpaul#include <sys/vnode.h> 7850974Swpaul 7950974Swpaul#include <security/mac/mac_framework.h> 8050974Swpaul 8150974Swpaul#include <vm/vm.h> 8250974Swpaul#include <vm/vm_param.h> 8350974Swpaul#include <vm/pmap.h> 8450974Swpaul#include <vm/vm_map.h> 8550974Swpaul#include <vm/vm_object.h> 8650974Swpaul#include <vm/vm_page.h> 8750974Swpaul#include <vm/vm_pager.h> 8850974Swpaul#include <vm/swap_pager.h> 8950974Swpaul 9050974Swpaulstruct shm_mapping { 9150974Swpaul char *sm_path; 9250974Swpaul Fnv32_t sm_fnv; 9350974Swpaul struct shmfd *sm_shmfd; 9450974Swpaul LIST_ENTRY(shm_mapping) sm_link; 9550974Swpaul}; 9659758Speter 9759758Speterstatic MALLOC_DEFINE(M_SHMFD, "shmfd", "shared memory file descriptor"); 9851089Speterstatic LIST_HEAD(, shm_mapping) *shm_dictionary; 9950974Swpaulstatic struct sx shm_dict_lock; 10050974Swpaulstatic struct mtx shm_timestamp_lock; 10150974Swpaulstatic u_long shm_hash; 10250974Swpaul 10350974Swpaul#define SHM_HASH(fnv) (&shm_dictionary[(fnv) & shm_hash]) 10450974Swpaul 10550974Swpaulstatic int shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags); 10650974Swpaulstatic struct shmfd *shm_alloc(struct ucred *ucred, mode_t mode); 10750974Swpaulstatic void shm_dict_init(void *arg); 10850974Swpaulstatic void shm_drop(struct shmfd *shmfd); 10950974Swpaulstatic struct shmfd *shm_hold(struct shmfd *shmfd); 11050974Swpaulstatic void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd); 11150974Swpaulstatic struct shmfd *shm_lookup(char *path, Fnv32_t fnv); 11262672Swpaulstatic int shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred); 11350974Swpaulstatic int shm_dotruncate(struct shmfd *shmfd, off_t length); 11450974Swpaul 11550974Swpaulstatic fo_rdwr_t shm_read; 11650974Swpaulstatic fo_rdwr_t shm_write; 11750974Swpaulstatic fo_truncate_t shm_truncate; 11850974Swpaulstatic fo_ioctl_t shm_ioctl; 11950974Swpaulstatic fo_poll_t shm_poll; 12050974Swpaulstatic fo_kqfilter_t shm_kqfilter; 12150974Swpaulstatic fo_stat_t shm_stat; 12250974Swpaulstatic fo_close_t shm_close; 12350974Swpaulstatic fo_chmod_t shm_chmod; 12450974Swpaulstatic fo_chown_t shm_chown; 12550974Swpaul 12650974Swpaul/* File descriptor operations. */ 12750974Swpaulstatic struct fileops shm_ops = { 12850974Swpaul .fo_read = shm_read, 12950974Swpaul .fo_write = shm_write, 13050974Swpaul .fo_truncate = shm_truncate, 13150974Swpaul .fo_ioctl = shm_ioctl, 13250974Swpaul .fo_poll = shm_poll, 13350974Swpaul .fo_kqfilter = shm_kqfilter, 13450974Swpaul .fo_stat = shm_stat, 13550974Swpaul .fo_close = shm_close, 13650974Swpaul .fo_chmod = shm_chmod, 13750974Swpaul .fo_chown = shm_chown, 13850974Swpaul .fo_flags = DFLAG_PASSABLE 13962672Swpaul}; 14050974Swpaul 14150974SwpaulFEATURE(posix_shm, "POSIX shared memory"); 14250974Swpaul 14350974Swpaulstatic int 14450974Swpaulshm_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 14550974Swpaul int flags, struct thread *td) 14672197Swpaul{ 14772197Swpaul 14872197Swpaul return (EOPNOTSUPP); 14972197Swpaul} 15072197Swpaul 15172197Swpaulstatic int 15250974Swpaulshm_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 15350974Swpaul int flags, struct thread *td) 15450974Swpaul{ 15550974Swpaul 15662672Swpaul return (EOPNOTSUPP); 15762672Swpaul} 15862672Swpaul 15950974Swpaulstatic int 16050974Swpaulshm_truncate(struct file *fp, off_t length, struct ucred *active_cred, 16150974Swpaul struct thread *td) 16250974Swpaul{ 16381713Swpaul struct shmfd *shmfd; 16481713Swpaul#ifdef MAC 16581713Swpaul int error; 16681713Swpaul#endif 16781713Swpaul 16881713Swpaul shmfd = fp->f_data; 16950974Swpaul#ifdef MAC 17050974Swpaul error = mac_posixshm_check_truncate(active_cred, fp->f_cred, shmfd); 17150974Swpaul if (error) 17250974Swpaul return (error); 17351030Swpaul#endif 17451030Swpaul return (shm_dotruncate(shmfd, length)); 17550974Swpaul} 17650974Swpaul 17750974Swpaulstatic int 17850974Swpaulshm_ioctl(struct file *fp, u_long com, void *data, 17950974Swpaul struct ucred *active_cred, struct thread *td) 18050974Swpaul{ 18150974Swpaul 18250974Swpaul return (EOPNOTSUPP); 18350974Swpaul} 18450974Swpaul 18550974Swpaulstatic int 18650974Swpaulshm_poll(struct file *fp, int events, struct ucred *active_cred, 18750974Swpaul struct thread *td) 18850974Swpaul{ 18950974Swpaul 19050974Swpaul return (EOPNOTSUPP); 19150974Swpaul} 19250974Swpaul 19350974Swpaulstatic int 19450974Swpaulshm_kqfilter(struct file *fp, struct knote *kn) 19550974Swpaul{ 19650974Swpaul 19751455Swpaul return (EOPNOTSUPP); 19850974Swpaul} 19950974Swpaul 20050974Swpaulstatic int 20150974Swpaulshm_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 20250974Swpaul struct thread *td) 20350974Swpaul{ 20487059Sluigi struct shmfd *shmfd; 20587059Sluigi#ifdef MAC 20687059Sluigi int error; 20787059Sluigi#endif 20887059Sluigi 20987059Sluigi shmfd = fp->f_data; 21051533Swpaul 21151473Swpaul#ifdef MAC 21250974Swpaul error = mac_posixshm_check_stat(active_cred, fp->f_cred, shmfd); 21350974Swpaul if (error) 21450974Swpaul return (error); 21550974Swpaul#endif 21650974Swpaul 21750974Swpaul /* 21850974Swpaul * Attempt to return sanish values for fstat() on a memory file 21950974Swpaul * descriptor. 22050974Swpaul */ 22150974Swpaul bzero(sb, sizeof(*sb)); 22250974Swpaul sb->st_blksize = PAGE_SIZE; 22350974Swpaul sb->st_size = shmfd->shm_size; 22450974Swpaul sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize; 22550974Swpaul mtx_lock(&shm_timestamp_lock); 22650974Swpaul sb->st_atim = shmfd->shm_atime; 22781713Swpaul sb->st_ctim = shmfd->shm_ctime; 22881713Swpaul sb->st_mtim = shmfd->shm_mtime; 22981713Swpaul sb->st_birthtim = shmfd->shm_birthtime; 23081713Swpaul sb->st_mode = S_IFREG | shmfd->shm_mode; /* XXX */ 23181713Swpaul sb->st_uid = shmfd->shm_uid; 23281713Swpaul sb->st_gid = shmfd->shm_gid; 23381713Swpaul mtx_unlock(&shm_timestamp_lock); 23481713Swpaul 23581713Swpaul return (0); 23681713Swpaul} 23781713Swpaul 23881713Swpaulstatic int 23981713Swpaulshm_close(struct file *fp, struct thread *td) 24081713Swpaul{ 24181713Swpaul struct shmfd *shmfd; 24281713Swpaul 24381713Swpaul shmfd = fp->f_data; 24481713Swpaul fp->f_data = NULL; 24581713Swpaul shm_drop(shmfd); 24681713Swpaul 24781713Swpaul return (0); 24881713Swpaul} 24981713Swpaul 25081713Swpaulstatic int 25181713Swpaulshm_dotruncate(struct shmfd *shmfd, off_t length) 25281713Swpaul{ 25381713Swpaul vm_object_t object; 25481713Swpaul vm_page_t m; 25581713Swpaul vm_pindex_t nobjsize; 25681713Swpaul vm_ooffset_t delta; 25781713Swpaul 25881713Swpaul object = shmfd->shm_object; 25981713Swpaul VM_OBJECT_LOCK(object); 26081713Swpaul if (length == shmfd->shm_size) { 26181713Swpaul VM_OBJECT_UNLOCK(object); 26281713Swpaul return (0); 26381713Swpaul } 26481713Swpaul nobjsize = OFF_TO_IDX(length + PAGE_MASK); 26581713Swpaul 26681713Swpaul /* Are we shrinking? If so, trim the end. */ 26781713Swpaul if (length < shmfd->shm_size) { 26881713Swpaul delta = ptoa(object->size - nobjsize); 26962672Swpaul 27062672Swpaul /* Toss in memory pages. */ 27162672Swpaul if (nobjsize < object->size) 27262672Swpaul vm_object_page_remove(object, nobjsize, object->size, 27362672Swpaul 0); 27462672Swpaul 27562672Swpaul /* Toss pages from swap. */ 27662672Swpaul if (object->type == OBJT_SWAP) 27762672Swpaul swap_pager_freespace(object, nobjsize, delta); 27862672Swpaul 27962672Swpaul /* Free the swap accounted for shm */ 28062672Swpaul swap_release_by_cred(delta, object->cred); 28162672Swpaul object->charge -= delta; 28262672Swpaul 28362672Swpaul /* 28450974Swpaul * If the last page is partially mapped, then zero out 28550974Swpaul * the garbage at the end of the page. See comments 28650974Swpaul * in vnode_pager_setsize() for more details. 28750974Swpaul * 28850974Swpaul * XXXJHB: This handles in memory pages, but what about 28950974Swpaul * a page swapped out to disk? 29050974Swpaul */ 29150974Swpaul if ((length & PAGE_MASK) && 29250974Swpaul (m = vm_page_lookup(object, OFF_TO_IDX(length))) != NULL && 29350974Swpaul m->valid != 0) { 29450974Swpaul int base = (int)length & PAGE_MASK; 29550974Swpaul int size = PAGE_SIZE - base; 29650974Swpaul 29750974Swpaul pmap_zero_page_area(m, base, size); 29850974Swpaul 29950974Swpaul /* 30050974Swpaul * Update the valid bits to reflect the blocks that 30150974Swpaul * have been zeroed. Some of these valid bits may 30250974Swpaul * have already been set. 30350974Swpaul */ 30450974Swpaul vm_page_set_valid(m, base, size); 30550974Swpaul 30650974Swpaul /* 30750974Swpaul * Round "base" to the next block boundary so that the 30850974Swpaul * dirty bit for a partially zeroed block is not 30950974Swpaul * cleared. 31050974Swpaul */ 31150974Swpaul base = roundup2(base, DEV_BSIZE); 31250974Swpaul 31350974Swpaul vm_page_clear_dirty(m, base, PAGE_SIZE - base); 31450974Swpaul } else if ((length & PAGE_MASK) && 31550974Swpaul __predict_false(object->cache != NULL)) { 31650974Swpaul vm_page_cache_free(object, OFF_TO_IDX(length), 31750974Swpaul nobjsize); 31850974Swpaul } 31950974Swpaul } else { 32050974Swpaul 32150974Swpaul /* Attempt to reserve the swap */ 32250974Swpaul delta = ptoa(nobjsize - object->size); 32350974Swpaul if (!swap_reserve_by_cred(delta, object->cred)) { 32450974Swpaul VM_OBJECT_UNLOCK(object); 32550974Swpaul return (ENOMEM); 32650974Swpaul } 32750974Swpaul object->charge += delta; 32850974Swpaul } 32950974Swpaul shmfd->shm_size = length; 33050974Swpaul mtx_lock(&shm_timestamp_lock); 33150974Swpaul vfs_timestamp(&shmfd->shm_ctime); 33250974Swpaul shmfd->shm_mtime = shmfd->shm_ctime; 33350974Swpaul mtx_unlock(&shm_timestamp_lock); 33450974Swpaul object->size = nobjsize; 33550974Swpaul VM_OBJECT_UNLOCK(object); 33650974Swpaul return (0); 33750974Swpaul} 33850974Swpaul 33950974Swpaul/* 34050974Swpaul * shmfd object management including creation and reference counting 34150974Swpaul * routines. 34250974Swpaul */ 34350974Swpaulstatic struct shmfd * 34450974Swpaulshm_alloc(struct ucred *ucred, mode_t mode) 34550974Swpaul{ 34650974Swpaul struct shmfd *shmfd; 34750974Swpaul 34850974Swpaul shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO); 34950974Swpaul shmfd->shm_size = 0; 35050974Swpaul shmfd->shm_uid = ucred->cr_uid; 35150974Swpaul shmfd->shm_gid = ucred->cr_gid; 35250974Swpaul shmfd->shm_mode = mode; 35350974Swpaul shmfd->shm_object = vm_pager_allocate(OBJT_DEFAULT, NULL, 35450974Swpaul shmfd->shm_size, VM_PROT_DEFAULT, 0, ucred); 35550974Swpaul KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate")); 35650974Swpaul VM_OBJECT_LOCK(shmfd->shm_object); 35750974Swpaul vm_object_clear_flag(shmfd->shm_object, OBJ_ONEMAPPING); 35850974Swpaul vm_object_set_flag(shmfd->shm_object, OBJ_NOSPLIT); 35950974Swpaul VM_OBJECT_UNLOCK(shmfd->shm_object); 36050974Swpaul vfs_timestamp(&shmfd->shm_birthtime); 36150974Swpaul shmfd->shm_atime = shmfd->shm_mtime = shmfd->shm_ctime = 36250974Swpaul shmfd->shm_birthtime; 36350974Swpaul refcount_init(&shmfd->shm_refs, 1); 36450974Swpaul#ifdef MAC 36550974Swpaul mac_posixshm_init(shmfd); 36650974Swpaul mac_posixshm_create(ucred, shmfd); 36762672Swpaul#endif 36862672Swpaul 36950974Swpaul return (shmfd); 37050974Swpaul} 37150974Swpaul 37250974Swpaulstatic struct shmfd * 37350974Swpaulshm_hold(struct shmfd *shmfd) 37450974Swpaul{ 37550974Swpaul 37650974Swpaul refcount_acquire(&shmfd->shm_refs); 37750974Swpaul return (shmfd); 37850974Swpaul} 37950974Swpaul 38050974Swpaulstatic void 38150974Swpaulshm_drop(struct shmfd *shmfd) 38250974Swpaul{ 38350974Swpaul 38450974Swpaul if (refcount_release(&shmfd->shm_refs)) { 38550974Swpaul#ifdef MAC 38650974Swpaul mac_posixshm_destroy(shmfd); 38750974Swpaul#endif 38850974Swpaul vm_object_deallocate(shmfd->shm_object); 38950974Swpaul free(shmfd, M_SHMFD); 39050974Swpaul } 39150974Swpaul} 39250974Swpaul 39350974Swpaul/* 39450974Swpaul * Determine if the credentials have sufficient permissions for a 39550974Swpaul * specified combination of FREAD and FWRITE. 39650974Swpaul */ 39750974Swpaulstatic int 39850974Swpaulshm_access(struct shmfd *shmfd, struct ucred *ucred, int flags) 39950974Swpaul{ 40050974Swpaul accmode_t accmode; 40150974Swpaul int error; 40250974Swpaul 40350974Swpaul accmode = 0; 40450974Swpaul if (flags & FREAD) 40550974Swpaul accmode |= VREAD; 40650974Swpaul if (flags & FWRITE) 40750974Swpaul accmode |= VWRITE; 40850974Swpaul mtx_lock(&shm_timestamp_lock); 40950974Swpaul error = vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid, 41050974Swpaul accmode, ucred, NULL); 41150974Swpaul mtx_unlock(&shm_timestamp_lock); 41250974Swpaul return (error); 41350974Swpaul} 41450974Swpaul 41550974Swpaul/* 41650974Swpaul * Dictionary management. We maintain an in-kernel dictionary to map 41750974Swpaul * paths to shmfd objects. We use the FNV hash on the path to store 41850974Swpaul * the mappings in a hash table. 41950974Swpaul */ 42050974Swpaulstatic void 42150974Swpaulshm_dict_init(void *arg) 42250974Swpaul{ 42372197Swpaul 42472197Swpaul mtx_init(&shm_timestamp_lock, "shm timestamps", NULL, MTX_DEF); 42572197Swpaul sx_init(&shm_dict_lock, "shm dictionary"); 42672197Swpaul shm_dictionary = hashinit(1024, M_SHMFD, &shm_hash); 42772197Swpaul} 42872197SwpaulSYSINIT(shm_dict_init, SI_SUB_SYSV_SHM, SI_ORDER_ANY, shm_dict_init, NULL); 42972197Swpaul 43072197Swpaulstatic struct shmfd * 43172197Swpaulshm_lookup(char *path, Fnv32_t fnv) 43272197Swpaul{ 43372197Swpaul struct shm_mapping *map; 43472197Swpaul 43572197Swpaul LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { 43672197Swpaul if (map->sm_fnv != fnv) 43772197Swpaul continue; 43872197Swpaul if (strcmp(map->sm_path, path) == 0) 43972197Swpaul return (map->sm_shmfd); 44072197Swpaul } 44172197Swpaul 44272197Swpaul return (NULL); 44372197Swpaul} 44472197Swpaul 44572197Swpaulstatic void 44672197Swpaulshm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd) 44772197Swpaul{ 44872197Swpaul struct shm_mapping *map; 44972197Swpaul 45072197Swpaul map = malloc(sizeof(struct shm_mapping), M_SHMFD, M_WAITOK); 45172197Swpaul map->sm_path = path; 45272197Swpaul map->sm_fnv = fnv; 45372197Swpaul map->sm_shmfd = shm_hold(shmfd); 45472197Swpaul LIST_INSERT_HEAD(SHM_HASH(fnv), map, sm_link); 45572197Swpaul} 45672197Swpaul 45772197Swpaulstatic int 45872197Swpaulshm_remove(char *path, Fnv32_t fnv, struct ucred *ucred) 45972197Swpaul{ 46072197Swpaul struct shm_mapping *map; 46172197Swpaul int error; 46272197Swpaul 46372197Swpaul LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { 46472197Swpaul if (map->sm_fnv != fnv) 46572197Swpaul continue; 46672197Swpaul if (strcmp(map->sm_path, path) == 0) { 46772197Swpaul#ifdef MAC 46872197Swpaul error = mac_posixshm_check_unlink(ucred, map->sm_shmfd); 46972197Swpaul if (error) 47072197Swpaul return (error); 47172197Swpaul#endif 47272197Swpaul error = shm_access(map->sm_shmfd, ucred, 47372197Swpaul FREAD | FWRITE); 47472197Swpaul if (error) 47572197Swpaul return (error); 47672197Swpaul LIST_REMOVE(map, sm_link); 47772197Swpaul shm_drop(map->sm_shmfd); 47872197Swpaul free(map->sm_path, M_SHMFD); 47972197Swpaul free(map, M_SHMFD); 48072197Swpaul return (0); 48172197Swpaul } 48272197Swpaul } 48372197Swpaul 48472197Swpaul return (ENOENT); 48572197Swpaul} 48672197Swpaul 48772197Swpaul/* System calls. */ 48872197Swpaulint 48972197Swpaulshm_open(struct thread *td, struct shm_open_args *uap) 49050974Swpaul{ 49150974Swpaul struct filedesc *fdp; 49250974Swpaul struct shmfd *shmfd; 49350974Swpaul struct file *fp; 49450974Swpaul char *path; 49562672Swpaul Fnv32_t fnv; 49650974Swpaul mode_t cmode; 49750974Swpaul int fd, error; 49850974Swpaul 49962672Swpaul#ifdef CAPABILITY_MODE 50062672Swpaul /* 50162672Swpaul * shm_open(2) is only allowed for anonymous objects. 50262672Swpaul */ 50362672Swpaul if (IN_CAPABILITY_MODE(td) && (uap->path != SHM_ANON)) 50462672Swpaul return (ECAPMODE); 50562672Swpaul#endif 50662672Swpaul 50762672Swpaul if ((uap->flags & O_ACCMODE) != O_RDONLY && 50862672Swpaul (uap->flags & O_ACCMODE) != O_RDWR) 50962672Swpaul return (EINVAL); 51062672Swpaul 51162672Swpaul if ((uap->flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC)) != 0) 51262672Swpaul return (EINVAL); 51362672Swpaul 51462672Swpaul fdp = td->td_proc->p_fd; 51562672Swpaul cmode = (uap->mode & ~fdp->fd_cmask) & ACCESSPERMS; 51662672Swpaul 51762672Swpaul error = falloc(td, &fp, &fd, 0); 51850974Swpaul if (error) 51950974Swpaul return (error); 52050974Swpaul 52150974Swpaul /* A SHM_ANON path pointer creates an anonymous object. */ 52250974Swpaul if (uap->path == SHM_ANON) { 52350974Swpaul /* A read-only anonymous object is pointless. */ 52450974Swpaul if ((uap->flags & O_ACCMODE) == O_RDONLY) { 52550974Swpaul fdclose(fdp, fp, fd, td); 52650974Swpaul fdrop(fp, td); 52750974Swpaul return (EINVAL); 52850974Swpaul } 52950974Swpaul shmfd = shm_alloc(td->td_ucred, cmode); 53050974Swpaul } else { 53150974Swpaul path = malloc(MAXPATHLEN, M_SHMFD, M_WAITOK); 53250974Swpaul error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 53350974Swpaul 53450974Swpaul /* Require paths to start with a '/' character. */ 53550974Swpaul if (error == 0 && path[0] != '/') 53650974Swpaul error = EINVAL; 53750974Swpaul if (error) { 53850974Swpaul fdclose(fdp, fp, fd, td); 53950974Swpaul fdrop(fp, td); 54050974Swpaul free(path, M_SHMFD); 54150974Swpaul return (error); 54250974Swpaul } 54350974Swpaul 54450974Swpaul fnv = fnv_32_str(path, FNV1_32_INIT); 54550974Swpaul sx_xlock(&shm_dict_lock); 54650974Swpaul shmfd = shm_lookup(path, fnv); 54750974Swpaul if (shmfd == NULL) { 54850974Swpaul /* Object does not yet exist, create it if requested. */ 54950974Swpaul if (uap->flags & O_CREAT) { 55050974Swpaul#ifdef MAC 55162672Swpaul error = mac_posixshm_check_create(td->td_ucred, 55262672Swpaul path); 55362672Swpaul if (error == 0) { 55462672Swpaul#endif 55562672Swpaul shmfd = shm_alloc(td->td_ucred, cmode); 55662672Swpaul shm_insert(path, fnv, shmfd); 55762672Swpaul#ifdef MAC 55850974Swpaul } 55950974Swpaul#endif 56050974Swpaul } else { 56150974Swpaul free(path, M_SHMFD); 56250974Swpaul error = ENOENT; 56350974Swpaul } 56450974Swpaul } else { 56550974Swpaul /* 56650974Swpaul * Object already exists, obtain a new 56750974Swpaul * reference if requested and permitted. 56850974Swpaul */ 56950974Swpaul free(path, M_SHMFD); 57050974Swpaul if ((uap->flags & (O_CREAT | O_EXCL)) == 57150974Swpaul (O_CREAT | O_EXCL)) 57250974Swpaul error = EEXIST; 57350974Swpaul else { 57450974Swpaul#ifdef MAC 57550974Swpaul error = mac_posixshm_check_open(td->td_ucred, 57650974Swpaul shmfd, FFLAGS(uap->flags & O_ACCMODE)); 57750974Swpaul if (error == 0) 57850974Swpaul#endif 57950974Swpaul error = shm_access(shmfd, td->td_ucred, 58050974Swpaul FFLAGS(uap->flags & O_ACCMODE)); 58150974Swpaul } 58264963Swpaul 58350974Swpaul /* 58450974Swpaul * Truncate the file back to zero length if 58550974Swpaul * O_TRUNC was specified and the object was 58650974Swpaul * opened with read/write. 58762672Swpaul */ 58862672Swpaul if (error == 0 && 58950974Swpaul (uap->flags & (O_ACCMODE | O_TRUNC)) == 59050974Swpaul (O_RDWR | O_TRUNC)) { 59150974Swpaul#ifdef MAC 59250974Swpaul error = mac_posixshm_check_truncate( 59350974Swpaul td->td_ucred, fp->f_cred, shmfd); 59450974Swpaul if (error == 0) 59550974Swpaul#endif 59650974Swpaul shm_dotruncate(shmfd, 0); 59750974Swpaul } 59850974Swpaul if (error == 0) 59950974Swpaul shm_hold(shmfd); 60050974Swpaul } 60150974Swpaul sx_xunlock(&shm_dict_lock); 60250974Swpaul 60350974Swpaul if (error) { 60450974Swpaul fdclose(fdp, fp, fd, td); 60550974Swpaul fdrop(fp, td); 60650974Swpaul return (error); 60750974Swpaul } 60850974Swpaul } 60962672Swpaul 61062672Swpaul finit(fp, FFLAGS(uap->flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops); 61162672Swpaul 61262672Swpaul FILEDESC_XLOCK(fdp); 61362672Swpaul if (fdp->fd_ofiles[fd] == fp) 61462672Swpaul fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 61562672Swpaul FILEDESC_XUNLOCK(fdp); 61662672Swpaul td->td_retval[0] = fd; 61762672Swpaul fdrop(fp, td); 61850974Swpaul 61950974Swpaul return (0); 62050974Swpaul} 62162672Swpaul 62250974Swpaulint 62350974Swpaulshm_unlink(struct thread *td, struct shm_unlink_args *uap) 62450974Swpaul{ 62550974Swpaul char *path; 62650974Swpaul Fnv32_t fnv; 62762672Swpaul int error; 62850974Swpaul 62950974Swpaul path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 63050974Swpaul error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 63150974Swpaul if (error) { 63262672Swpaul free(path, M_TEMP); 63350974Swpaul return (error); 63450974Swpaul } 63550974Swpaul 63650974Swpaul fnv = fnv_32_str(path, FNV1_32_INIT); 63762672Swpaul sx_xlock(&shm_dict_lock); 63862672Swpaul error = shm_remove(path, fnv, td->td_ucred); 63962672Swpaul sx_xunlock(&shm_dict_lock); 64062672Swpaul free(path, M_TEMP); 64162672Swpaul 64250974Swpaul return (error); 64350974Swpaul} 64450974Swpaul 64550974Swpaul/* 64650974Swpaul * mmap() helper to validate mmap() requests against shm object state 64762672Swpaul * and give mmap() the vm_object to use for the mapping. 64862672Swpaul */ 64962672Swpaulint 65062672Swpaulshm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff, 65162672Swpaul vm_object_t *obj) 65272084Sphk{ 65362672Swpaul 65462672Swpaul /* 65562672Swpaul * XXXRW: This validation is probably insufficient, and subject to 65662672Swpaul * sign errors. It should be fixed. 65762672Swpaul */ 65862672Swpaul if (foff >= shmfd->shm_size || 65962672Swpaul foff + objsize > round_page(shmfd->shm_size)) 66062672Swpaul return (EINVAL); 66162672Swpaul 66262672Swpaul mtx_lock(&shm_timestamp_lock); 66362672Swpaul vfs_timestamp(&shmfd->shm_atime); 66462672Swpaul mtx_unlock(&shm_timestamp_lock); 66562672Swpaul vm_object_reference(shmfd->shm_object); 66662672Swpaul *obj = shmfd->shm_object; 66762672Swpaul return (0); 66862672Swpaul} 66962672Swpaul 67062672Swpaulstatic int 67162672Swpaulshm_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, 67262672Swpaul struct thread *td) 67362672Swpaul{ 67462672Swpaul struct shmfd *shmfd; 67562672Swpaul int error; 67662672Swpaul 67762672Swpaul error = 0; 67862672Swpaul shmfd = fp->f_data; 67962672Swpaul mtx_lock(&shm_timestamp_lock); 68062672Swpaul /* 68162672Swpaul * SUSv4 says that x bits of permission need not be affected. 68262672Swpaul * Be consistent with our shm_open there. 68362672Swpaul */ 68462672Swpaul#ifdef MAC 68562672Swpaul error = mac_posixshm_check_setmode(active_cred, shmfd, mode); 68662672Swpaul if (error != 0) 68762672Swpaul goto out; 68850974Swpaul#endif 68950974Swpaul error = vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, 69050974Swpaul shmfd->shm_gid, VADMIN, active_cred, NULL); 69150974Swpaul if (error != 0) 69250974Swpaul goto out; 69350974Swpaul shmfd->shm_mode = mode & ACCESSPERMS; 69472084Sphkout: 69550974Swpaul mtx_unlock(&shm_timestamp_lock); 69650974Swpaul return (error); 69762672Swpaul} 69850974Swpaul 69950974Swpaulstatic int 70050974Swpaulshm_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, 70150974Swpaul struct thread *td) 70250974Swpaul{ 70350974Swpaul struct shmfd *shmfd; 70450974Swpaul int error; 70550974Swpaul 70650974Swpaul error = 0; 70750974Swpaul shmfd = fp->f_data; 70850974Swpaul mtx_lock(&shm_timestamp_lock); 70950974Swpaul#ifdef MAC 71050974Swpaul error = mac_posixshm_check_setowner(active_cred, shmfd, uid, gid); 71150974Swpaul if (error != 0) 71250974Swpaul goto out; 71350974Swpaul#endif 71450974Swpaul if (uid == (uid_t)-1) 71550974Swpaul uid = shmfd->shm_uid; 71650974Swpaul if (gid == (gid_t)-1) 71750974Swpaul gid = shmfd->shm_gid; 71850974Swpaul if (((uid != shmfd->shm_uid && uid != active_cred->cr_uid) || 71950974Swpaul (gid != shmfd->shm_gid && !groupmember(gid, active_cred))) && 72050974Swpaul (error = priv_check_cred(active_cred, PRIV_VFS_CHOWN, 0))) 72150974Swpaul goto out; 72250974Swpaul shmfd->shm_uid = uid; 72350974Swpaul shmfd->shm_gid = gid; 72472813Swpaulout: 72572813Swpaul mtx_unlock(&shm_timestamp_lock); 72672813Swpaul return (error); 72772813Swpaul} 72872813Swpaul