60#include <sys/param.h> 61#include <sys/fcntl.h> 62#include <sys/file.h> 63#include <sys/filedesc.h> 64#include <sys/fnv_hash.h> 65#include <sys/kernel.h> 66#include <sys/lock.h> 67#include <sys/malloc.h> 68#include <sys/mman.h> 69#include <sys/mutex.h> 70#include <sys/proc.h> 71#include <sys/refcount.h> 72#include <sys/resourcevar.h> 73#include <sys/stat.h> 74#include <sys/sysctl.h> 75#include <sys/sysproto.h> 76#include <sys/systm.h> 77#include <sys/sx.h> 78#include <sys/time.h> 79#include <sys/vnode.h> 80 81#include <security/mac/mac_framework.h> 82 83#include <vm/vm.h> 84#include <vm/vm_param.h> 85#include <vm/pmap.h> 86#include <vm/vm_map.h> 87#include <vm/vm_object.h> 88#include <vm/vm_page.h> 89#include <vm/vm_pager.h> 90#include <vm/swap_pager.h> 91 92struct shm_mapping { 93 char *sm_path; 94 Fnv32_t sm_fnv; 95 struct shmfd *sm_shmfd; 96 LIST_ENTRY(shm_mapping) sm_link; 97}; 98 99static MALLOC_DEFINE(M_SHMFD, "shmfd", "shared memory file descriptor"); 100static LIST_HEAD(, shm_mapping) *shm_dictionary; 101static struct sx shm_dict_lock; 102static struct mtx shm_timestamp_lock; 103static u_long shm_hash; 104 105#define SHM_HASH(fnv) (&shm_dictionary[(fnv) & shm_hash]) 106 107static int shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags); 108static struct shmfd *shm_alloc(struct ucred *ucred, mode_t mode); 109static void shm_dict_init(void *arg); 110static void shm_drop(struct shmfd *shmfd); 111static struct shmfd *shm_hold(struct shmfd *shmfd); 112static void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd); 113static struct shmfd *shm_lookup(char *path, Fnv32_t fnv); 114static int shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred); 115static void shm_dotruncate(struct shmfd *shmfd, off_t length); 116 117static fo_rdwr_t shm_read; 118static fo_rdwr_t shm_write; 119static fo_truncate_t shm_truncate; 120static fo_ioctl_t shm_ioctl; 121static fo_poll_t shm_poll; 122static fo_kqfilter_t shm_kqfilter; 123static fo_stat_t shm_stat; 124static fo_close_t shm_close; 125 126/* File descriptor operations. */ 127static struct fileops shm_ops = { 128 .fo_read = shm_read, 129 .fo_write = shm_write, 130 .fo_truncate = shm_truncate, 131 .fo_ioctl = shm_ioctl, 132 .fo_poll = shm_poll, 133 .fo_kqfilter = shm_kqfilter, 134 .fo_stat = shm_stat, 135 .fo_close = shm_close, 136 .fo_flags = DFLAG_PASSABLE 137}; 138 139FEATURE(posix_shm, "POSIX shared memory"); 140 141static int 142shm_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 143 int flags, struct thread *td) 144{ 145 146 return (EOPNOTSUPP); 147} 148 149static int 150shm_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 151 int flags, struct thread *td) 152{ 153 154 return (EOPNOTSUPP); 155} 156 157static int 158shm_truncate(struct file *fp, off_t length, struct ucred *active_cred, 159 struct thread *td) 160{ 161 struct shmfd *shmfd; 162#ifdef MAC 163 int error; 164#endif 165 166 shmfd = fp->f_data; 167#ifdef MAC 168 error = mac_posixshm_check_truncate(active_cred, fp->f_cred, shmfd); 169 if (error) 170 return (error); 171#endif 172 shm_dotruncate(shmfd, length); 173 return (0); 174} 175 176static int 177shm_ioctl(struct file *fp, u_long com, void *data, 178 struct ucred *active_cred, struct thread *td) 179{ 180 181 return (EOPNOTSUPP); 182} 183 184static int 185shm_poll(struct file *fp, int events, struct ucred *active_cred, 186 struct thread *td) 187{ 188 189 return (EOPNOTSUPP); 190} 191 192static int 193shm_kqfilter(struct file *fp, struct knote *kn) 194{ 195 196 return (EOPNOTSUPP); 197} 198 199static int 200shm_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 201 struct thread *td) 202{ 203 struct shmfd *shmfd; 204#ifdef MAC 205 int error; 206#endif 207 208 shmfd = fp->f_data; 209 210#ifdef MAC 211 error = mac_posixshm_check_stat(active_cred, fp->f_cred, shmfd); 212 if (error) 213 return (error); 214#endif 215 216 /* 217 * Attempt to return sanish values for fstat() on a memory file 218 * descriptor. 219 */ 220 bzero(sb, sizeof(*sb)); 221 sb->st_mode = S_IFREG | shmfd->shm_mode; /* XXX */ 222 sb->st_blksize = PAGE_SIZE; 223 sb->st_size = shmfd->shm_size; 224 sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize; 225 sb->st_atimespec = shmfd->shm_atime; 226 sb->st_ctimespec = shmfd->shm_ctime; 227 sb->st_mtimespec = shmfd->shm_mtime; 228 sb->st_birthtimespec = shmfd->shm_birthtime; 229 sb->st_uid = shmfd->shm_uid; 230 sb->st_gid = shmfd->shm_gid; 231 232 return (0); 233} 234 235static int 236shm_close(struct file *fp, struct thread *td) 237{ 238 struct shmfd *shmfd; 239 240 shmfd = fp->f_data; 241 fp->f_data = NULL; 242 shm_drop(shmfd); 243 244 return (0); 245} 246 247static void 248shm_dotruncate(struct shmfd *shmfd, off_t length) 249{ 250 vm_object_t object; 251 vm_page_t m; 252 vm_pindex_t nobjsize; 253 254 object = shmfd->shm_object; 255 VM_OBJECT_LOCK(object); 256 if (length == shmfd->shm_size) { 257 VM_OBJECT_UNLOCK(object); 258 return; 259 } 260 nobjsize = OFF_TO_IDX(length + PAGE_MASK); 261 262 /* Are we shrinking? If so, trim the end. */ 263 if (length < shmfd->shm_size) { 264 /* Toss in memory pages. */ 265 if (nobjsize < object->size) 266 vm_object_page_remove(object, nobjsize, object->size, 267 FALSE); 268 269 /* Toss pages from swap. */ 270 if (object->type == OBJT_SWAP) 271 swap_pager_freespace(object, nobjsize, 272 object->size - nobjsize); 273 274 /* 275 * If the last page is partially mapped, then zero out 276 * the garbage at the end of the page. See comments 277 * in vnode_pager_setsize() for more details. 278 * 279 * XXXJHB: This handles in memory pages, but what about 280 * a page swapped out to disk? 281 */ 282 if ((length & PAGE_MASK) && 283 (m = vm_page_lookup(object, OFF_TO_IDX(length))) != NULL && 284 m->valid != 0) { 285 int base = (int)length & PAGE_MASK; 286 int size = PAGE_SIZE - base; 287 288 pmap_zero_page_area(m, base, size); 289 290 /* 291 * Update the valid bits to reflect the blocks that 292 * have been zeroed. Some of these valid bits may 293 * have already been set. 294 */ 295 vm_page_set_valid(m, base, size); 296 297 /* 298 * Round "base" to the next block boundary so that the 299 * dirty bit for a partially zeroed block is not 300 * cleared. 301 */ 302 base = roundup2(base, DEV_BSIZE); 303 304 vm_page_lock_queues(); 305 vm_page_clear_dirty(m, base, PAGE_SIZE - base); 306 vm_page_unlock_queues(); 307 } else if ((length & PAGE_MASK) && 308 __predict_false(object->cache != NULL)) { 309 vm_page_cache_free(object, OFF_TO_IDX(length), 310 nobjsize); 311 } 312 } 313 shmfd->shm_size = length; 314 mtx_lock(&shm_timestamp_lock); 315 vfs_timestamp(&shmfd->shm_ctime); 316 shmfd->shm_mtime = shmfd->shm_ctime; 317 mtx_unlock(&shm_timestamp_lock); 318 object->size = nobjsize; 319 VM_OBJECT_UNLOCK(object); 320} 321 322/* 323 * shmfd object management including creation and reference counting 324 * routines. 325 */ 326static struct shmfd * 327shm_alloc(struct ucred *ucred, mode_t mode) 328{ 329 struct shmfd *shmfd; 330 331 shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO); 332 shmfd->shm_size = 0; 333 shmfd->shm_uid = ucred->cr_uid; 334 shmfd->shm_gid = ucred->cr_gid; 335 shmfd->shm_mode = mode; 336 shmfd->shm_object = vm_pager_allocate(OBJT_DEFAULT, NULL, 337 shmfd->shm_size, VM_PROT_DEFAULT, 0); 338 KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate")); 339 VM_OBJECT_LOCK(shmfd->shm_object); 340 vm_object_clear_flag(shmfd->shm_object, OBJ_ONEMAPPING); 341 vm_object_set_flag(shmfd->shm_object, OBJ_NOSPLIT); 342 VM_OBJECT_UNLOCK(shmfd->shm_object); 343 vfs_timestamp(&shmfd->shm_birthtime); 344 shmfd->shm_atime = shmfd->shm_mtime = shmfd->shm_ctime = 345 shmfd->shm_birthtime; 346 refcount_init(&shmfd->shm_refs, 1); 347#ifdef MAC 348 mac_posixshm_init(shmfd); 349 mac_posixshm_create(ucred, shmfd); 350#endif 351 352 return (shmfd); 353} 354 355static struct shmfd * 356shm_hold(struct shmfd *shmfd) 357{ 358 359 refcount_acquire(&shmfd->shm_refs); 360 return (shmfd); 361} 362 363static void 364shm_drop(struct shmfd *shmfd) 365{ 366 367 if (refcount_release(&shmfd->shm_refs)) { 368#ifdef MAC 369 mac_posixshm_destroy(shmfd); 370#endif 371 vm_object_deallocate(shmfd->shm_object); 372 free(shmfd, M_SHMFD); 373 } 374} 375 376/* 377 * Determine if the credentials have sufficient permissions for a 378 * specified combination of FREAD and FWRITE. 379 */ 380static int 381shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags) 382{ 383 accmode_t accmode; 384 385 accmode = 0; 386 if (flags & FREAD) 387 accmode |= VREAD; 388 if (flags & FWRITE) 389 accmode |= VWRITE; 390 return (vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid, 391 accmode, ucred, NULL)); 392} 393 394/* 395 * Dictionary management. We maintain an in-kernel dictionary to map 396 * paths to shmfd objects. We use the FNV hash on the path to store 397 * the mappings in a hash table. 398 */ 399static void 400shm_dict_init(void *arg) 401{ 402 403 mtx_init(&shm_timestamp_lock, "shm timestamps", NULL, MTX_DEF); 404 sx_init(&shm_dict_lock, "shm dictionary"); 405 shm_dictionary = hashinit(1024, M_SHMFD, &shm_hash); 406} 407SYSINIT(shm_dict_init, SI_SUB_SYSV_SHM, SI_ORDER_ANY, shm_dict_init, NULL); 408 409static struct shmfd * 410shm_lookup(char *path, Fnv32_t fnv) 411{ 412 struct shm_mapping *map; 413 414 LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { 415 if (map->sm_fnv != fnv) 416 continue; 417 if (strcmp(map->sm_path, path) == 0) 418 return (map->sm_shmfd); 419 } 420 421 return (NULL); 422} 423 424static void 425shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd) 426{ 427 struct shm_mapping *map; 428 429 map = malloc(sizeof(struct shm_mapping), M_SHMFD, M_WAITOK); 430 map->sm_path = path; 431 map->sm_fnv = fnv; 432 map->sm_shmfd = shm_hold(shmfd); 433 LIST_INSERT_HEAD(SHM_HASH(fnv), map, sm_link); 434} 435 436static int 437shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred) 438{ 439 struct shm_mapping *map; 440 int error; 441 442 LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { 443 if (map->sm_fnv != fnv) 444 continue; 445 if (strcmp(map->sm_path, path) == 0) { 446#ifdef MAC 447 error = mac_posixshm_check_unlink(ucred, map->sm_shmfd); 448 if (error) 449 return (error); 450#endif 451 error = shm_access(map->sm_shmfd, ucred, 452 FREAD | FWRITE); 453 if (error) 454 return (error); 455 LIST_REMOVE(map, sm_link); 456 shm_drop(map->sm_shmfd); 457 free(map->sm_path, M_SHMFD); 458 free(map, M_SHMFD); 459 return (0); 460 } 461 } 462 463 return (ENOENT); 464} 465 466/* System calls. */ 467int 468shm_open(struct thread *td, struct shm_open_args *uap) 469{ 470 struct filedesc *fdp; 471 struct shmfd *shmfd; 472 struct file *fp; 473 char *path; 474 Fnv32_t fnv; 475 mode_t cmode; 476 int fd, error; 477 478 if ((uap->flags & O_ACCMODE) != O_RDONLY && 479 (uap->flags & O_ACCMODE) != O_RDWR) 480 return (EINVAL); 481 482 if ((uap->flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC)) != 0) 483 return (EINVAL); 484 485 fdp = td->td_proc->p_fd; 486 cmode = (uap->mode & ~fdp->fd_cmask) & ACCESSPERMS; 487 488 error = falloc(td, &fp, &fd); 489 if (error) 490 return (error); 491 492 /* A SHM_ANON path pointer creates an anonymous object. */ 493 if (uap->path == SHM_ANON) { 494 /* A read-only anonymous object is pointless. */ 495 if ((uap->flags & O_ACCMODE) == O_RDONLY) { 496 fdclose(fdp, fp, fd, td); 497 fdrop(fp, td); 498 return (EINVAL); 499 } 500 shmfd = shm_alloc(td->td_ucred, cmode); 501 } else { 502 path = malloc(MAXPATHLEN, M_SHMFD, M_WAITOK); 503 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 504 505 /* Require paths to start with a '/' character. */ 506 if (error == 0 && path[0] != '/') 507 error = EINVAL; 508 if (error) { 509 fdclose(fdp, fp, fd, td); 510 fdrop(fp, td); 511 free(path, M_SHMFD); 512 return (error); 513 } 514 515 fnv = fnv_32_str(path, FNV1_32_INIT); 516 sx_xlock(&shm_dict_lock); 517 shmfd = shm_lookup(path, fnv); 518 if (shmfd == NULL) { 519 /* Object does not yet exist, create it if requested. */ 520 if (uap->flags & O_CREAT) { 521 shmfd = shm_alloc(td->td_ucred, cmode); 522 shm_insert(path, fnv, shmfd); 523 } else { 524 free(path, M_SHMFD); 525 error = ENOENT; 526 } 527 } else { 528 /* 529 * Object already exists, obtain a new 530 * reference if requested and permitted. 531 */ 532 free(path, M_SHMFD); 533 if ((uap->flags & (O_CREAT | O_EXCL)) == 534 (O_CREAT | O_EXCL)) 535 error = EEXIST; 536 else { 537#ifdef MAC 538 error = mac_posixshm_check_open(td->td_ucred, 539 shmfd); 540 if (error == 0) 541#endif 542 error = shm_access(shmfd, td->td_ucred, 543 FFLAGS(uap->flags & O_ACCMODE)); 544 } 545 546 /* 547 * Truncate the file back to zero length if 548 * O_TRUNC was specified and the object was 549 * opened with read/write. 550 */ 551 if (error == 0 && 552 (uap->flags & (O_ACCMODE | O_TRUNC)) == 553 (O_RDWR | O_TRUNC)) { 554#ifdef MAC 555 error = mac_posixshm_check_truncate( 556 td->td_ucred, fp->f_cred, shmfd); 557 if (error == 0) 558#endif 559 shm_dotruncate(shmfd, 0); 560 } 561 if (error == 0) 562 shm_hold(shmfd); 563 } 564 sx_xunlock(&shm_dict_lock); 565 566 if (error) { 567 fdclose(fdp, fp, fd, td); 568 fdrop(fp, td); 569 return (error); 570 } 571 } 572 573 finit(fp, FFLAGS(uap->flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops); 574 575 FILEDESC_XLOCK(fdp); 576 if (fdp->fd_ofiles[fd] == fp) 577 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 578 FILEDESC_XUNLOCK(fdp); 579 td->td_retval[0] = fd; 580 fdrop(fp, td); 581 582 return (0); 583} 584 585int 586shm_unlink(struct thread *td, struct shm_unlink_args *uap) 587{ 588 char *path; 589 Fnv32_t fnv; 590 int error; 591 592 path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 593 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 594 if (error) { 595 free(path, M_TEMP); 596 return (error); 597 } 598 599 fnv = fnv_32_str(path, FNV1_32_INIT); 600 sx_xlock(&shm_dict_lock); 601 error = shm_remove(path, fnv, td->td_ucred); 602 sx_xunlock(&shm_dict_lock); 603 free(path, M_TEMP); 604 605 return (error); 606} 607 608/* 609 * mmap() helper to validate mmap() requests against shm object state 610 * and give mmap() the vm_object to use for the mapping. 611 */ 612int 613shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff, 614 vm_object_t *obj) 615{ 616 617 /* 618 * XXXRW: This validation is probably insufficient, and subject to 619 * sign errors. It should be fixed. 620 */ 621 if (foff >= shmfd->shm_size || 622 foff + objsize > round_page(shmfd->shm_size)) 623 return (EINVAL); 624 625 mtx_lock(&shm_timestamp_lock); 626 vfs_timestamp(&shmfd->shm_atime); 627 mtx_unlock(&shm_timestamp_lock); 628 vm_object_reference(shmfd->shm_object); 629 *obj = shmfd->shm_object; 630 return (0); 631}
| 58#include <sys/param.h> 59#include <sys/fcntl.h> 60#include <sys/file.h> 61#include <sys/filedesc.h> 62#include <sys/fnv_hash.h> 63#include <sys/kernel.h> 64#include <sys/lock.h> 65#include <sys/malloc.h> 66#include <sys/mman.h> 67#include <sys/mutex.h> 68#include <sys/proc.h> 69#include <sys/refcount.h> 70#include <sys/resourcevar.h> 71#include <sys/stat.h> 72#include <sys/sysctl.h> 73#include <sys/sysproto.h> 74#include <sys/systm.h> 75#include <sys/sx.h> 76#include <sys/time.h> 77#include <sys/vnode.h> 78 79#include <security/mac/mac_framework.h> 80 81#include <vm/vm.h> 82#include <vm/vm_param.h> 83#include <vm/pmap.h> 84#include <vm/vm_map.h> 85#include <vm/vm_object.h> 86#include <vm/vm_page.h> 87#include <vm/vm_pager.h> 88#include <vm/swap_pager.h> 89 90struct shm_mapping { 91 char *sm_path; 92 Fnv32_t sm_fnv; 93 struct shmfd *sm_shmfd; 94 LIST_ENTRY(shm_mapping) sm_link; 95}; 96 97static MALLOC_DEFINE(M_SHMFD, "shmfd", "shared memory file descriptor"); 98static LIST_HEAD(, shm_mapping) *shm_dictionary; 99static struct sx shm_dict_lock; 100static struct mtx shm_timestamp_lock; 101static u_long shm_hash; 102 103#define SHM_HASH(fnv) (&shm_dictionary[(fnv) & shm_hash]) 104 105static int shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags); 106static struct shmfd *shm_alloc(struct ucred *ucred, mode_t mode); 107static void shm_dict_init(void *arg); 108static void shm_drop(struct shmfd *shmfd); 109static struct shmfd *shm_hold(struct shmfd *shmfd); 110static void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd); 111static struct shmfd *shm_lookup(char *path, Fnv32_t fnv); 112static int shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred); 113static void shm_dotruncate(struct shmfd *shmfd, off_t length); 114 115static fo_rdwr_t shm_read; 116static fo_rdwr_t shm_write; 117static fo_truncate_t shm_truncate; 118static fo_ioctl_t shm_ioctl; 119static fo_poll_t shm_poll; 120static fo_kqfilter_t shm_kqfilter; 121static fo_stat_t shm_stat; 122static fo_close_t shm_close; 123 124/* File descriptor operations. */ 125static struct fileops shm_ops = { 126 .fo_read = shm_read, 127 .fo_write = shm_write, 128 .fo_truncate = shm_truncate, 129 .fo_ioctl = shm_ioctl, 130 .fo_poll = shm_poll, 131 .fo_kqfilter = shm_kqfilter, 132 .fo_stat = shm_stat, 133 .fo_close = shm_close, 134 .fo_flags = DFLAG_PASSABLE 135}; 136 137FEATURE(posix_shm, "POSIX shared memory"); 138 139static int 140shm_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 141 int flags, struct thread *td) 142{ 143 144 return (EOPNOTSUPP); 145} 146 147static int 148shm_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 149 int flags, struct thread *td) 150{ 151 152 return (EOPNOTSUPP); 153} 154 155static int 156shm_truncate(struct file *fp, off_t length, struct ucred *active_cred, 157 struct thread *td) 158{ 159 struct shmfd *shmfd; 160#ifdef MAC 161 int error; 162#endif 163 164 shmfd = fp->f_data; 165#ifdef MAC 166 error = mac_posixshm_check_truncate(active_cred, fp->f_cred, shmfd); 167 if (error) 168 return (error); 169#endif 170 shm_dotruncate(shmfd, length); 171 return (0); 172} 173 174static int 175shm_ioctl(struct file *fp, u_long com, void *data, 176 struct ucred *active_cred, struct thread *td) 177{ 178 179 return (EOPNOTSUPP); 180} 181 182static int 183shm_poll(struct file *fp, int events, struct ucred *active_cred, 184 struct thread *td) 185{ 186 187 return (EOPNOTSUPP); 188} 189 190static int 191shm_kqfilter(struct file *fp, struct knote *kn) 192{ 193 194 return (EOPNOTSUPP); 195} 196 197static int 198shm_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, 199 struct thread *td) 200{ 201 struct shmfd *shmfd; 202#ifdef MAC 203 int error; 204#endif 205 206 shmfd = fp->f_data; 207 208#ifdef MAC 209 error = mac_posixshm_check_stat(active_cred, fp->f_cred, shmfd); 210 if (error) 211 return (error); 212#endif 213 214 /* 215 * Attempt to return sanish values for fstat() on a memory file 216 * descriptor. 217 */ 218 bzero(sb, sizeof(*sb)); 219 sb->st_mode = S_IFREG | shmfd->shm_mode; /* XXX */ 220 sb->st_blksize = PAGE_SIZE; 221 sb->st_size = shmfd->shm_size; 222 sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize; 223 sb->st_atimespec = shmfd->shm_atime; 224 sb->st_ctimespec = shmfd->shm_ctime; 225 sb->st_mtimespec = shmfd->shm_mtime; 226 sb->st_birthtimespec = shmfd->shm_birthtime; 227 sb->st_uid = shmfd->shm_uid; 228 sb->st_gid = shmfd->shm_gid; 229 230 return (0); 231} 232 233static int 234shm_close(struct file *fp, struct thread *td) 235{ 236 struct shmfd *shmfd; 237 238 shmfd = fp->f_data; 239 fp->f_data = NULL; 240 shm_drop(shmfd); 241 242 return (0); 243} 244 245static void 246shm_dotruncate(struct shmfd *shmfd, off_t length) 247{ 248 vm_object_t object; 249 vm_page_t m; 250 vm_pindex_t nobjsize; 251 252 object = shmfd->shm_object; 253 VM_OBJECT_LOCK(object); 254 if (length == shmfd->shm_size) { 255 VM_OBJECT_UNLOCK(object); 256 return; 257 } 258 nobjsize = OFF_TO_IDX(length + PAGE_MASK); 259 260 /* Are we shrinking? If so, trim the end. */ 261 if (length < shmfd->shm_size) { 262 /* Toss in memory pages. */ 263 if (nobjsize < object->size) 264 vm_object_page_remove(object, nobjsize, object->size, 265 FALSE); 266 267 /* Toss pages from swap. */ 268 if (object->type == OBJT_SWAP) 269 swap_pager_freespace(object, nobjsize, 270 object->size - nobjsize); 271 272 /* 273 * If the last page is partially mapped, then zero out 274 * the garbage at the end of the page. See comments 275 * in vnode_pager_setsize() for more details. 276 * 277 * XXXJHB: This handles in memory pages, but what about 278 * a page swapped out to disk? 279 */ 280 if ((length & PAGE_MASK) && 281 (m = vm_page_lookup(object, OFF_TO_IDX(length))) != NULL && 282 m->valid != 0) { 283 int base = (int)length & PAGE_MASK; 284 int size = PAGE_SIZE - base; 285 286 pmap_zero_page_area(m, base, size); 287 288 /* 289 * Update the valid bits to reflect the blocks that 290 * have been zeroed. Some of these valid bits may 291 * have already been set. 292 */ 293 vm_page_set_valid(m, base, size); 294 295 /* 296 * Round "base" to the next block boundary so that the 297 * dirty bit for a partially zeroed block is not 298 * cleared. 299 */ 300 base = roundup2(base, DEV_BSIZE); 301 302 vm_page_lock_queues(); 303 vm_page_clear_dirty(m, base, PAGE_SIZE - base); 304 vm_page_unlock_queues(); 305 } else if ((length & PAGE_MASK) && 306 __predict_false(object->cache != NULL)) { 307 vm_page_cache_free(object, OFF_TO_IDX(length), 308 nobjsize); 309 } 310 } 311 shmfd->shm_size = length; 312 mtx_lock(&shm_timestamp_lock); 313 vfs_timestamp(&shmfd->shm_ctime); 314 shmfd->shm_mtime = shmfd->shm_ctime; 315 mtx_unlock(&shm_timestamp_lock); 316 object->size = nobjsize; 317 VM_OBJECT_UNLOCK(object); 318} 319 320/* 321 * shmfd object management including creation and reference counting 322 * routines. 323 */ 324static struct shmfd * 325shm_alloc(struct ucred *ucred, mode_t mode) 326{ 327 struct shmfd *shmfd; 328 329 shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO); 330 shmfd->shm_size = 0; 331 shmfd->shm_uid = ucred->cr_uid; 332 shmfd->shm_gid = ucred->cr_gid; 333 shmfd->shm_mode = mode; 334 shmfd->shm_object = vm_pager_allocate(OBJT_DEFAULT, NULL, 335 shmfd->shm_size, VM_PROT_DEFAULT, 0); 336 KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate")); 337 VM_OBJECT_LOCK(shmfd->shm_object); 338 vm_object_clear_flag(shmfd->shm_object, OBJ_ONEMAPPING); 339 vm_object_set_flag(shmfd->shm_object, OBJ_NOSPLIT); 340 VM_OBJECT_UNLOCK(shmfd->shm_object); 341 vfs_timestamp(&shmfd->shm_birthtime); 342 shmfd->shm_atime = shmfd->shm_mtime = shmfd->shm_ctime = 343 shmfd->shm_birthtime; 344 refcount_init(&shmfd->shm_refs, 1); 345#ifdef MAC 346 mac_posixshm_init(shmfd); 347 mac_posixshm_create(ucred, shmfd); 348#endif 349 350 return (shmfd); 351} 352 353static struct shmfd * 354shm_hold(struct shmfd *shmfd) 355{ 356 357 refcount_acquire(&shmfd->shm_refs); 358 return (shmfd); 359} 360 361static void 362shm_drop(struct shmfd *shmfd) 363{ 364 365 if (refcount_release(&shmfd->shm_refs)) { 366#ifdef MAC 367 mac_posixshm_destroy(shmfd); 368#endif 369 vm_object_deallocate(shmfd->shm_object); 370 free(shmfd, M_SHMFD); 371 } 372} 373 374/* 375 * Determine if the credentials have sufficient permissions for a 376 * specified combination of FREAD and FWRITE. 377 */ 378static int 379shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags) 380{ 381 accmode_t accmode; 382 383 accmode = 0; 384 if (flags & FREAD) 385 accmode |= VREAD; 386 if (flags & FWRITE) 387 accmode |= VWRITE; 388 return (vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid, 389 accmode, ucred, NULL)); 390} 391 392/* 393 * Dictionary management. We maintain an in-kernel dictionary to map 394 * paths to shmfd objects. We use the FNV hash on the path to store 395 * the mappings in a hash table. 396 */ 397static void 398shm_dict_init(void *arg) 399{ 400 401 mtx_init(&shm_timestamp_lock, "shm timestamps", NULL, MTX_DEF); 402 sx_init(&shm_dict_lock, "shm dictionary"); 403 shm_dictionary = hashinit(1024, M_SHMFD, &shm_hash); 404} 405SYSINIT(shm_dict_init, SI_SUB_SYSV_SHM, SI_ORDER_ANY, shm_dict_init, NULL); 406 407static struct shmfd * 408shm_lookup(char *path, Fnv32_t fnv) 409{ 410 struct shm_mapping *map; 411 412 LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { 413 if (map->sm_fnv != fnv) 414 continue; 415 if (strcmp(map->sm_path, path) == 0) 416 return (map->sm_shmfd); 417 } 418 419 return (NULL); 420} 421 422static void 423shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd) 424{ 425 struct shm_mapping *map; 426 427 map = malloc(sizeof(struct shm_mapping), M_SHMFD, M_WAITOK); 428 map->sm_path = path; 429 map->sm_fnv = fnv; 430 map->sm_shmfd = shm_hold(shmfd); 431 LIST_INSERT_HEAD(SHM_HASH(fnv), map, sm_link); 432} 433 434static int 435shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred) 436{ 437 struct shm_mapping *map; 438 int error; 439 440 LIST_FOREACH(map, SHM_HASH(fnv), sm_link) { 441 if (map->sm_fnv != fnv) 442 continue; 443 if (strcmp(map->sm_path, path) == 0) { 444#ifdef MAC 445 error = mac_posixshm_check_unlink(ucred, map->sm_shmfd); 446 if (error) 447 return (error); 448#endif 449 error = shm_access(map->sm_shmfd, ucred, 450 FREAD | FWRITE); 451 if (error) 452 return (error); 453 LIST_REMOVE(map, sm_link); 454 shm_drop(map->sm_shmfd); 455 free(map->sm_path, M_SHMFD); 456 free(map, M_SHMFD); 457 return (0); 458 } 459 } 460 461 return (ENOENT); 462} 463 464/* System calls. */ 465int 466shm_open(struct thread *td, struct shm_open_args *uap) 467{ 468 struct filedesc *fdp; 469 struct shmfd *shmfd; 470 struct file *fp; 471 char *path; 472 Fnv32_t fnv; 473 mode_t cmode; 474 int fd, error; 475 476 if ((uap->flags & O_ACCMODE) != O_RDONLY && 477 (uap->flags & O_ACCMODE) != O_RDWR) 478 return (EINVAL); 479 480 if ((uap->flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC)) != 0) 481 return (EINVAL); 482 483 fdp = td->td_proc->p_fd; 484 cmode = (uap->mode & ~fdp->fd_cmask) & ACCESSPERMS; 485 486 error = falloc(td, &fp, &fd); 487 if (error) 488 return (error); 489 490 /* A SHM_ANON path pointer creates an anonymous object. */ 491 if (uap->path == SHM_ANON) { 492 /* A read-only anonymous object is pointless. */ 493 if ((uap->flags & O_ACCMODE) == O_RDONLY) { 494 fdclose(fdp, fp, fd, td); 495 fdrop(fp, td); 496 return (EINVAL); 497 } 498 shmfd = shm_alloc(td->td_ucred, cmode); 499 } else { 500 path = malloc(MAXPATHLEN, M_SHMFD, M_WAITOK); 501 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 502 503 /* Require paths to start with a '/' character. */ 504 if (error == 0 && path[0] != '/') 505 error = EINVAL; 506 if (error) { 507 fdclose(fdp, fp, fd, td); 508 fdrop(fp, td); 509 free(path, M_SHMFD); 510 return (error); 511 } 512 513 fnv = fnv_32_str(path, FNV1_32_INIT); 514 sx_xlock(&shm_dict_lock); 515 shmfd = shm_lookup(path, fnv); 516 if (shmfd == NULL) { 517 /* Object does not yet exist, create it if requested. */ 518 if (uap->flags & O_CREAT) { 519 shmfd = shm_alloc(td->td_ucred, cmode); 520 shm_insert(path, fnv, shmfd); 521 } else { 522 free(path, M_SHMFD); 523 error = ENOENT; 524 } 525 } else { 526 /* 527 * Object already exists, obtain a new 528 * reference if requested and permitted. 529 */ 530 free(path, M_SHMFD); 531 if ((uap->flags & (O_CREAT | O_EXCL)) == 532 (O_CREAT | O_EXCL)) 533 error = EEXIST; 534 else { 535#ifdef MAC 536 error = mac_posixshm_check_open(td->td_ucred, 537 shmfd); 538 if (error == 0) 539#endif 540 error = shm_access(shmfd, td->td_ucred, 541 FFLAGS(uap->flags & O_ACCMODE)); 542 } 543 544 /* 545 * Truncate the file back to zero length if 546 * O_TRUNC was specified and the object was 547 * opened with read/write. 548 */ 549 if (error == 0 && 550 (uap->flags & (O_ACCMODE | O_TRUNC)) == 551 (O_RDWR | O_TRUNC)) { 552#ifdef MAC 553 error = mac_posixshm_check_truncate( 554 td->td_ucred, fp->f_cred, shmfd); 555 if (error == 0) 556#endif 557 shm_dotruncate(shmfd, 0); 558 } 559 if (error == 0) 560 shm_hold(shmfd); 561 } 562 sx_xunlock(&shm_dict_lock); 563 564 if (error) { 565 fdclose(fdp, fp, fd, td); 566 fdrop(fp, td); 567 return (error); 568 } 569 } 570 571 finit(fp, FFLAGS(uap->flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops); 572 573 FILEDESC_XLOCK(fdp); 574 if (fdp->fd_ofiles[fd] == fp) 575 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 576 FILEDESC_XUNLOCK(fdp); 577 td->td_retval[0] = fd; 578 fdrop(fp, td); 579 580 return (0); 581} 582 583int 584shm_unlink(struct thread *td, struct shm_unlink_args *uap) 585{ 586 char *path; 587 Fnv32_t fnv; 588 int error; 589 590 path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 591 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 592 if (error) { 593 free(path, M_TEMP); 594 return (error); 595 } 596 597 fnv = fnv_32_str(path, FNV1_32_INIT); 598 sx_xlock(&shm_dict_lock); 599 error = shm_remove(path, fnv, td->td_ucred); 600 sx_xunlock(&shm_dict_lock); 601 free(path, M_TEMP); 602 603 return (error); 604} 605 606/* 607 * mmap() helper to validate mmap() requests against shm object state 608 * and give mmap() the vm_object to use for the mapping. 609 */ 610int 611shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff, 612 vm_object_t *obj) 613{ 614 615 /* 616 * XXXRW: This validation is probably insufficient, and subject to 617 * sign errors. It should be fixed. 618 */ 619 if (foff >= shmfd->shm_size || 620 foff + objsize > round_page(shmfd->shm_size)) 621 return (EINVAL); 622 623 mtx_lock(&shm_timestamp_lock); 624 vfs_timestamp(&shmfd->shm_atime); 625 mtx_unlock(&shm_timestamp_lock); 626 vm_object_reference(shmfd->shm_object); 627 *obj = shmfd->shm_object; 628 return (0); 629}
|