sysv_shm.c revision 1.105
1/* $NetBSD: sysv_shm.c,v 1.105 2008/01/30 21:09:41 njoly Exp $ */ 2 3/*- 4 * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, and by Mindaugas Rasiukevicius. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40/* 41 * Copyright (c) 1994 Adam Glass and Charles M. Hannum. All rights reserved. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. All advertising materials mentioning features or use of this software 52 * must display the following acknowledgement: 53 * This product includes software developed by Adam Glass and Charles M. 54 * Hannum. 55 * 4. The names of the authors may not be used to endorse or promote products 56 * derived from this software without specific prior written permission. 57 * 58 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 59 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 60 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 61 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, 62 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 63 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 64 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 65 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 66 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 67 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 68 */ 69 70#include <sys/cdefs.h> 71__KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.105 2008/01/30 21:09:41 njoly Exp $"); 72 73#define SYSVSHM 74 75#include <sys/param.h> 76#include <sys/kernel.h> 77#include <sys/kmem.h> 78#include <sys/shm.h> 79#include <sys/mutex.h> 80#include <sys/mman.h> 81#include <sys/stat.h> 82#include <sys/sysctl.h> 83#include <sys/mount.h> /* XXX for <sys/syscallargs.h> */ 84#include <sys/syscallargs.h> 85#include <sys/queue.h> 86#include <sys/pool.h> 87#include <sys/kauth.h> 88 89#include <uvm/uvm_extern.h> 90#include <uvm/uvm_object.h> 91 92int shm_nused; 93struct shmid_ds *shmsegs; 94 95struct shmmap_entry { 96 SLIST_ENTRY(shmmap_entry) next; 97 vaddr_t va; 98 int shmid; 99}; 100 101static kmutex_t shm_lock; 102static kcondvar_t * shm_cv; 103static struct pool shmmap_entry_pool; 104static int shm_last_free, shm_committed, shm_use_phys; 105 106static kcondvar_t shm_realloc_cv; 107static bool shm_realloc_state; 108static u_int shm_realloc_disable; 109 110struct shmmap_state { 111 unsigned int nitems; 112 unsigned int nrefs; 113 SLIST_HEAD(, shmmap_entry) entries; 114}; 115 116#ifdef SHMDEBUG 117#define SHMPRINTF(a) printf a 118#else 119#define SHMPRINTF(a) 120#endif 121 122static int shmrealloc(int); 123 124/* 125 * Find the shared memory segment by the identifier. 126 * => must be called with shm_lock held; 127 */ 128static struct shmid_ds * 129shm_find_segment_by_shmid(int shmid) 130{ 131 int segnum; 132 struct shmid_ds *shmseg; 133 134 KASSERT(mutex_owned(&shm_lock)); 135 136 segnum = IPCID_TO_IX(shmid); 137 if (segnum < 0 || segnum >= shminfo.shmmni) 138 return NULL; 139 shmseg = &shmsegs[segnum]; 140 if ((shmseg->shm_perm.mode & SHMSEG_ALLOCATED) == 0) 141 return NULL; 142 if ((shmseg->shm_perm.mode & 143 (SHMSEG_REMOVED|SHMSEG_RMLINGER)) == SHMSEG_REMOVED) 144 return NULL; 145 if (shmseg->shm_perm._seq != IPCID_TO_SEQ(shmid)) 146 return NULL; 147 148 return shmseg; 149} 150 151/* 152 * Free memory segment. 153 * => must be called with shm_lock held; 154 */ 155static void 156shm_free_segment(int segnum) 157{ 158 struct shmid_ds *shmseg; 159 size_t size; 160 bool wanted; 161 162 KASSERT(mutex_owned(&shm_lock)); 163 164 shmseg = &shmsegs[segnum]; 165 SHMPRINTF(("shm freeing key 0x%lx seq 0x%x\n", 166 shmseg->shm_perm._key, shmseg->shm_perm._seq)); 167 168 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 169 wanted = (shmseg->shm_perm.mode & SHMSEG_WANTED); 170 171 shmseg->_shm_internal = NULL; 172 shm_committed -= btoc(size); 173 shm_nused--; 174 shmseg->shm_perm.mode = SHMSEG_FREE; 175 shm_last_free = segnum; 176 if (wanted == true) 177 cv_broadcast(&shm_cv[segnum]); 178} 179 180/* 181 * Delete entry from the shm map. 182 * => must be called with shm_lock held; 183 */ 184static struct uvm_object * 185shm_delete_mapping(struct shmmap_state *shmmap_s, 186 struct shmmap_entry *shmmap_se) 187{ 188 struct uvm_object *uobj = NULL; 189 struct shmid_ds *shmseg; 190 int segnum; 191 192 KASSERT(mutex_owned(&shm_lock)); 193 194 segnum = IPCID_TO_IX(shmmap_se->shmid); 195 shmseg = &shmsegs[segnum]; 196 SLIST_REMOVE(&shmmap_s->entries, shmmap_se, shmmap_entry, next); 197 shmmap_s->nitems--; 198 shmseg->shm_dtime = time_second; 199 if ((--shmseg->shm_nattch <= 0) && 200 (shmseg->shm_perm.mode & SHMSEG_REMOVED)) { 201 uobj = shmseg->_shm_internal; 202 shm_free_segment(segnum); 203 } 204 205 return uobj; 206} 207 208/* 209 * Get a non-shared shm map for that vmspace. Note, that memory 210 * allocation might be performed with lock held. 211 */ 212static struct shmmap_state * 213shmmap_getprivate(struct proc *p) 214{ 215 struct shmmap_state *oshmmap_s, *shmmap_s; 216 struct shmmap_entry *oshmmap_se, *shmmap_se; 217 218 KASSERT(mutex_owned(&shm_lock)); 219 220 /* 1. A shm map with refcnt = 1, used by ourselves, thus return */ 221 oshmmap_s = (struct shmmap_state *)p->p_vmspace->vm_shm; 222 if (oshmmap_s && oshmmap_s->nrefs == 1) 223 return oshmmap_s; 224 225 /* 2. No shm map preset - create a fresh one */ 226 shmmap_s = kmem_zalloc(sizeof(struct shmmap_state), KM_SLEEP); 227 shmmap_s->nrefs = 1; 228 SLIST_INIT(&shmmap_s->entries); 229 p->p_vmspace->vm_shm = (void *)shmmap_s; 230 231 if (oshmmap_s == NULL) 232 return shmmap_s; 233 234 SHMPRINTF(("shmmap_getprivate: vm %p split (%d entries), was used by %d\n", 235 p->p_vmspace, oshmmap_s->nitems, oshmmap_s->nrefs)); 236 237 /* 3. A shared shm map, copy to a fresh one and adjust refcounts */ 238 SLIST_FOREACH(oshmmap_se, &oshmmap_s->entries, next) { 239 shmmap_se = pool_get(&shmmap_entry_pool, PR_WAITOK); 240 shmmap_se->va = oshmmap_se->va; 241 shmmap_se->shmid = oshmmap_se->shmid; 242 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next); 243 } 244 shmmap_s->nitems = oshmmap_s->nitems; 245 oshmmap_s->nrefs--; 246 247 return shmmap_s; 248} 249 250/* 251 * Lock/unlock the memory. 252 * => must be called with shm_lock held; 253 * => called from one place, thus, inline; 254 */ 255static inline int 256shm_memlock(struct lwp *l, struct shmid_ds *shmseg, int shmid, int cmd) 257{ 258 struct proc *p = l->l_proc; 259 struct shmmap_entry *shmmap_se; 260 struct shmmap_state *shmmap_s; 261 size_t size; 262 int error; 263 264 KASSERT(mutex_owned(&shm_lock)); 265 shmmap_s = shmmap_getprivate(p); 266 267 /* Find our shared memory address by shmid */ 268 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) { 269 if (shmmap_se->shmid != shmid) 270 continue; 271 272 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 273 274 if (cmd == SHM_LOCK && 275 (shmseg->shm_perm.mode & SHMSEG_WIRED) == 0) { 276 /* Wire the object and map, then tag it */ 277 error = uobj_wirepages(shmseg->_shm_internal, 0, 278 round_page(shmseg->shm_segsz)); 279 if (error) 280 return EIO; 281 error = uvm_map_pageable(&p->p_vmspace->vm_map, 282 shmmap_se->va, shmmap_se->va + size, false, 0); 283 if (error) { 284 uobj_unwirepages(shmseg->_shm_internal, 0, 285 round_page(shmseg->shm_segsz)); 286 if (error == EFAULT) 287 error = ENOMEM; 288 return error; 289 } 290 shmseg->shm_perm.mode |= SHMSEG_WIRED; 291 292 } else if (cmd == SHM_UNLOCK && 293 (shmseg->shm_perm.mode & SHMSEG_WIRED) != 0) { 294 /* Unwire the object and map, then untag it */ 295 uobj_unwirepages(shmseg->_shm_internal, 0, 296 round_page(shmseg->shm_segsz)); 297 error = uvm_map_pageable(&p->p_vmspace->vm_map, 298 shmmap_se->va, shmmap_se->va + size, true, 0); 299 if (error) 300 return EIO; 301 shmseg->shm_perm.mode &= ~SHMSEG_WIRED; 302 } 303 } 304 305 return 0; 306} 307 308/* 309 * Unmap shared memory. 310 */ 311int 312sys_shmdt(struct lwp *l, const struct sys_shmdt_args *uap, register_t *retval) 313{ 314 /* { 315 syscallarg(const void *) shmaddr; 316 } */ 317 struct proc *p = l->l_proc; 318 struct shmmap_state *shmmap_s1, *shmmap_s; 319 struct shmmap_entry *shmmap_se; 320 struct uvm_object *uobj; 321 struct shmid_ds *shmseg; 322 size_t size; 323 324 mutex_enter(&shm_lock); 325 /* In case of reallocation, we will wait for completion */ 326 while (__predict_false(shm_realloc_state)) 327 cv_wait(&shm_realloc_cv, &shm_lock); 328 329 shmmap_s1 = (struct shmmap_state *)p->p_vmspace->vm_shm; 330 if (shmmap_s1 == NULL) { 331 mutex_exit(&shm_lock); 332 return EINVAL; 333 } 334 335 /* Find the map entry */ 336 SLIST_FOREACH(shmmap_se, &shmmap_s1->entries, next) 337 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr)) 338 break; 339 if (shmmap_se == NULL) { 340 mutex_exit(&shm_lock); 341 return EINVAL; 342 } 343 344 shmmap_s = shmmap_getprivate(p); 345 if (shmmap_s != shmmap_s1) { 346 /* Map has been copied, lookup entry in new map */ 347 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) 348 if (shmmap_se->va == (vaddr_t)SCARG(uap, shmaddr)) 349 break; 350 if (shmmap_se == NULL) { 351 mutex_exit(&shm_lock); 352 return EINVAL; 353 } 354 } 355 356 SHMPRINTF(("shmdt: vm %p: remove %d @%lx\n", 357 p->p_vmspace, shmmap_se->shmid, shmmap_se->va)); 358 359 /* Delete the entry from shm map */ 360 uobj = shm_delete_mapping(shmmap_s, shmmap_se); 361 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)]; 362 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 363 mutex_exit(&shm_lock); 364 365 uvm_deallocate(&p->p_vmspace->vm_map, shmmap_se->va, size); 366 if (uobj != NULL) 367 uao_detach(uobj); 368 pool_put(&shmmap_entry_pool, shmmap_se); 369 370 return 0; 371} 372 373/* 374 * Map shared memory. 375 */ 376int 377sys_shmat(struct lwp *l, const struct sys_shmat_args *uap, register_t *retval) 378{ 379 /* { 380 syscallarg(int) shmid; 381 syscallarg(const void *) shmaddr; 382 syscallarg(int) shmflg; 383 } */ 384 int error, flags = 0; 385 struct proc *p = l->l_proc; 386 kauth_cred_t cred = l->l_cred; 387 struct shmid_ds *shmseg; 388 struct shmmap_state *shmmap_s; 389 struct shmmap_entry *shmmap_se; 390 struct uvm_object *uobj; 391 struct vmspace *vm; 392 vaddr_t attach_va; 393 vm_prot_t prot; 394 vsize_t size; 395 396 /* Allocate a new map entry and set it */ 397 shmmap_se = pool_get(&shmmap_entry_pool, PR_WAITOK); 398 399 mutex_enter(&shm_lock); 400 /* In case of reallocation, we will wait for completion */ 401 while (__predict_false(shm_realloc_state)) 402 cv_wait(&shm_realloc_cv, &shm_lock); 403 404 shmseg = shm_find_segment_by_shmid(SCARG(uap, shmid)); 405 if (shmseg == NULL) { 406 error = EINVAL; 407 goto err; 408 } 409 error = ipcperm(cred, &shmseg->shm_perm, 410 (SCARG(uap, shmflg) & SHM_RDONLY) ? IPC_R : IPC_R|IPC_W); 411 if (error) 412 goto err; 413 414 vm = p->p_vmspace; 415 shmmap_s = (struct shmmap_state *)vm->vm_shm; 416 if (shmmap_s && shmmap_s->nitems >= shminfo.shmseg) { 417 error = EMFILE; 418 goto err; 419 } 420 421 size = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 422 prot = VM_PROT_READ; 423 if ((SCARG(uap, shmflg) & SHM_RDONLY) == 0) 424 prot |= VM_PROT_WRITE; 425 if (SCARG(uap, shmaddr)) { 426 flags |= UVM_FLAG_FIXED; 427 if (SCARG(uap, shmflg) & SHM_RND) 428 attach_va = 429 (vaddr_t)SCARG(uap, shmaddr) & ~(SHMLBA-1); 430 else if (((vaddr_t)SCARG(uap, shmaddr) & (SHMLBA-1)) == 0) 431 attach_va = (vaddr_t)SCARG(uap, shmaddr); 432 else { 433 error = EINVAL; 434 goto err; 435 } 436 } else { 437 /* This is just a hint to uvm_mmap() about where to put it. */ 438 attach_va = p->p_emul->e_vm_default_addr(p, 439 (vaddr_t)vm->vm_daddr, size); 440 } 441 442 /* 443 * Create a map entry, add it to the list and increase the counters. 444 * The lock will be dropped before the mapping, disable reallocation. 445 */ 446 shmmap_s = shmmap_getprivate(p); 447 SLIST_INSERT_HEAD(&shmmap_s->entries, shmmap_se, next); 448 shmmap_s->nitems++; 449 shmseg->shm_lpid = p->p_pid; 450 shmseg->shm_nattch++; 451 shm_realloc_disable++; 452 mutex_exit(&shm_lock); 453 454 /* 455 * Add a reference to the memory object, map it to the 456 * address space, and lock the memory, if needed. 457 */ 458 uobj = shmseg->_shm_internal; 459 uao_reference(uobj); 460 error = uvm_map(&vm->vm_map, &attach_va, size, uobj, 0, 0, 461 UVM_MAPFLAG(prot, prot, UVM_INH_SHARE, UVM_ADV_RANDOM, flags)); 462 if (error) 463 goto err_detach; 464 if (shm_use_phys || (shmseg->shm_perm.mode & SHMSEG_WIRED)) { 465 error = uvm_map_pageable(&vm->vm_map, attach_va, 466 attach_va + size, false, 0); 467 if (error) { 468 if (error == EFAULT) 469 error = ENOMEM; 470 uvm_deallocate(&vm->vm_map, attach_va, size); 471 goto err_detach; 472 } 473 } 474 475 /* Set the new address, and update the time */ 476 mutex_enter(&shm_lock); 477 shmmap_se->va = attach_va; 478 shmmap_se->shmid = SCARG(uap, shmid); 479 shmseg->shm_atime = time_second; 480 shm_realloc_disable--; 481 retval[0] = attach_va; 482 SHMPRINTF(("shmat: vm %p: add %d @%lx\n", 483 p->p_vmspace, shmmap_se->shmid, attach_va)); 484err: 485 cv_broadcast(&shm_realloc_cv); 486 mutex_exit(&shm_lock); 487 if (error && shmmap_se) 488 pool_put(&shmmap_entry_pool, shmmap_se); 489 return error; 490 491err_detach: 492 uao_detach(uobj); 493 mutex_enter(&shm_lock); 494 uobj = shm_delete_mapping(shmmap_s, shmmap_se); 495 shm_realloc_disable--; 496 cv_broadcast(&shm_realloc_cv); 497 mutex_exit(&shm_lock); 498 if (uobj != NULL) 499 uao_detach(uobj); 500 pool_put(&shmmap_entry_pool, shmmap_se); 501 return error; 502} 503 504/* 505 * Shared memory control operations. 506 */ 507int 508sys___shmctl13(struct lwp *l, const struct sys___shmctl13_args *uap, register_t *retval) 509{ 510 /* { 511 syscallarg(int) shmid; 512 syscallarg(int) cmd; 513 syscallarg(struct shmid_ds *) buf; 514 } */ 515 struct shmid_ds shmbuf; 516 int cmd, error; 517 518 cmd = SCARG(uap, cmd); 519 if (cmd == IPC_SET) { 520 error = copyin(SCARG(uap, buf), &shmbuf, sizeof(shmbuf)); 521 if (error) 522 return error; 523 } 524 525 error = shmctl1(l, SCARG(uap, shmid), cmd, 526 (cmd == IPC_SET || cmd == IPC_STAT) ? &shmbuf : NULL); 527 528 if (error == 0 && cmd == IPC_STAT) 529 error = copyout(&shmbuf, SCARG(uap, buf), sizeof(shmbuf)); 530 531 return error; 532} 533 534int 535shmctl1(struct lwp *l, int shmid, int cmd, struct shmid_ds *shmbuf) 536{ 537 struct uvm_object *uobj = NULL; 538 kauth_cred_t cred = l->l_cred; 539 struct shmid_ds *shmseg; 540 int error = 0; 541 542 mutex_enter(&shm_lock); 543 /* In case of reallocation, we will wait for completion */ 544 while (__predict_false(shm_realloc_state)) 545 cv_wait(&shm_realloc_cv, &shm_lock); 546 547 shmseg = shm_find_segment_by_shmid(shmid); 548 if (shmseg == NULL) { 549 mutex_exit(&shm_lock); 550 return EINVAL; 551 } 552 553 switch (cmd) { 554 case IPC_STAT: 555 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_R)) != 0) 556 break; 557 memcpy(shmbuf, shmseg, sizeof(struct shmid_ds)); 558 break; 559 case IPC_SET: 560 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0) 561 break; 562 shmseg->shm_perm.uid = shmbuf->shm_perm.uid; 563 shmseg->shm_perm.gid = shmbuf->shm_perm.gid; 564 shmseg->shm_perm.mode = 565 (shmseg->shm_perm.mode & ~ACCESSPERMS) | 566 (shmbuf->shm_perm.mode & ACCESSPERMS); 567 shmseg->shm_ctime = time_second; 568 break; 569 case IPC_RMID: 570 if ((error = ipcperm(cred, &shmseg->shm_perm, IPC_M)) != 0) 571 break; 572 shmseg->shm_perm._key = IPC_PRIVATE; 573 shmseg->shm_perm.mode |= SHMSEG_REMOVED; 574 if (shmseg->shm_nattch <= 0) { 575 uobj = shmseg->_shm_internal; 576 shm_free_segment(IPCID_TO_IX(shmid)); 577 } 578 break; 579 case SHM_LOCK: 580 case SHM_UNLOCK: 581 if ((error = kauth_authorize_generic(cred, 582 KAUTH_GENERIC_ISSUSER, NULL)) != 0) 583 break; 584 error = shm_memlock(l, shmseg, shmid, cmd); 585 break; 586 default: 587 error = EINVAL; 588 } 589 590 mutex_exit(&shm_lock); 591 if (uobj != NULL) 592 uao_detach(uobj); 593 return error; 594} 595 596/* 597 * Try to take an already existing segment. 598 * => must be called with shm_lock held; 599 * => called from one place, thus, inline; 600 */ 601static inline int 602shmget_existing(struct lwp *l, const struct sys_shmget_args *uap, int mode, 603 register_t *retval) 604{ 605 struct shmid_ds *shmseg; 606 kauth_cred_t cred = l->l_cred; 607 int segnum, error; 608again: 609 KASSERT(mutex_owned(&shm_lock)); 610 611 /* Find segment by key */ 612 for (segnum = 0; segnum < shminfo.shmmni; segnum++) 613 if ((shmsegs[segnum].shm_perm.mode & SHMSEG_ALLOCATED) && 614 shmsegs[segnum].shm_perm._key == SCARG(uap, key)) 615 break; 616 if (segnum == shminfo.shmmni) { 617 /* Not found */ 618 return -1; 619 } 620 621 shmseg = &shmsegs[segnum]; 622 if (shmseg->shm_perm.mode & SHMSEG_REMOVED) { 623 /* 624 * This segment is in the process of being allocated. Wait 625 * until it's done, and look the key up again (in case the 626 * allocation failed or it was freed). 627 */ 628 shmseg->shm_perm.mode |= SHMSEG_WANTED; 629 error = cv_wait_sig(&shm_cv[segnum], &shm_lock); 630 if (error) 631 return error; 632 goto again; 633 } 634 635 /* Check the permission, segment size and appropriate flag */ 636 error = ipcperm(cred, &shmseg->shm_perm, mode); 637 if (error) 638 return error; 639 if (SCARG(uap, size) && SCARG(uap, size) > shmseg->shm_segsz) 640 return EINVAL; 641 if ((SCARG(uap, shmflg) & (IPC_CREAT | IPC_EXCL)) == 642 (IPC_CREAT | IPC_EXCL)) 643 return EEXIST; 644 645 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); 646 return 0; 647} 648 649int 650sys_shmget(struct lwp *l, const struct sys_shmget_args *uap, register_t *retval) 651{ 652 /* { 653 syscallarg(key_t) key; 654 syscallarg(size_t) size; 655 syscallarg(int) shmflg; 656 } */ 657 struct shmid_ds *shmseg; 658 kauth_cred_t cred = l->l_cred; 659 key_t key = SCARG(uap, key); 660 size_t size; 661 int error, mode, segnum; 662 bool lockmem; 663 664 mode = SCARG(uap, shmflg) & ACCESSPERMS; 665 if (SCARG(uap, shmflg) & _SHM_RMLINGER) 666 mode |= SHMSEG_RMLINGER; 667 668 SHMPRINTF(("shmget: key 0x%lx size 0x%x shmflg 0x%x mode 0x%x\n", 669 SCARG(uap, key), SCARG(uap, size), SCARG(uap, shmflg), mode)); 670 671 mutex_enter(&shm_lock); 672 /* In case of reallocation, we will wait for completion */ 673 while (__predict_false(shm_realloc_state)) 674 cv_wait(&shm_realloc_cv, &shm_lock); 675 676 if (key != IPC_PRIVATE) { 677 error = shmget_existing(l, uap, mode, retval); 678 if (error != -1) { 679 mutex_exit(&shm_lock); 680 return error; 681 } 682 if ((SCARG(uap, shmflg) & IPC_CREAT) == 0) { 683 mutex_exit(&shm_lock); 684 return ENOENT; 685 } 686 } 687 error = 0; 688 689 /* 690 * Check the for the limits. 691 */ 692 size = SCARG(uap, size); 693 if (size < shminfo.shmmin || size > shminfo.shmmax) { 694 mutex_exit(&shm_lock); 695 return EINVAL; 696 } 697 if (shm_nused >= shminfo.shmmni) { 698 mutex_exit(&shm_lock); 699 return ENOSPC; 700 } 701 size = (size + PGOFSET) & ~PGOFSET; 702 if (shm_committed + btoc(size) > shminfo.shmall) { 703 mutex_exit(&shm_lock); 704 return ENOMEM; 705 } 706 707 /* Find the first available segment */ 708 if (shm_last_free < 0) { 709 for (segnum = 0; segnum < shminfo.shmmni; segnum++) 710 if (shmsegs[segnum].shm_perm.mode & SHMSEG_FREE) 711 break; 712 KASSERT(segnum < shminfo.shmmni); 713 } else { 714 segnum = shm_last_free; 715 shm_last_free = -1; 716 } 717 718 /* 719 * Initialize the segment. 720 * We will drop the lock while allocating the memory, thus mark the 721 * segment present, but removed, that no other thread could take it. 722 * Also, disable reallocation, while lock is dropped. 723 */ 724 shmseg = &shmsegs[segnum]; 725 shmseg->shm_perm.mode = SHMSEG_ALLOCATED | SHMSEG_REMOVED; 726 shm_committed += btoc(size); 727 shm_nused++; 728 lockmem = shm_use_phys; 729 shm_realloc_disable++; 730 mutex_exit(&shm_lock); 731 732 /* Allocate the memory object and lock it if needed */ 733 shmseg->_shm_internal = uao_create(size, 0); 734 if (lockmem) { 735 /* Wire the pages and tag it */ 736 error = uobj_wirepages(shmseg->_shm_internal, 0, 737 round_page(shmseg->shm_segsz)); 738 if (error) { 739 mutex_enter(&shm_lock); 740 shm_free_segment(segnum); 741 shm_realloc_disable--; 742 mutex_exit(&shm_lock); 743 return error; 744 } 745 } 746 747 /* 748 * Please note, while segment is marked, there are no need to hold the 749 * lock, while setting it (except shm_perm.mode). 750 */ 751 shmseg->shm_perm._key = SCARG(uap, key); 752 shmseg->shm_perm._seq = (shmseg->shm_perm._seq + 1) & 0x7fff; 753 *retval = IXSEQ_TO_IPCID(segnum, shmseg->shm_perm); 754 755 shmseg->shm_perm.cuid = shmseg->shm_perm.uid = kauth_cred_geteuid(cred); 756 shmseg->shm_perm.cgid = shmseg->shm_perm.gid = kauth_cred_getegid(cred); 757 shmseg->shm_segsz = SCARG(uap, size); 758 shmseg->shm_cpid = l->l_proc->p_pid; 759 shmseg->shm_lpid = shmseg->shm_nattch = 0; 760 shmseg->shm_atime = shmseg->shm_dtime = 0; 761 shmseg->shm_ctime = time_second; 762 763 /* 764 * Segment is initialized. 765 * Enter the lock, mark as allocated, and notify waiters (if any). 766 * Also, unmark the state of reallocation. 767 */ 768 mutex_enter(&shm_lock); 769 shmseg->shm_perm.mode = (shmseg->shm_perm.mode & SHMSEG_WANTED) | 770 (mode & (ACCESSPERMS | SHMSEG_RMLINGER)) | 771 SHMSEG_ALLOCATED | (lockmem ? SHMSEG_WIRED : 0); 772 if (shmseg->shm_perm.mode & SHMSEG_WANTED) { 773 shmseg->shm_perm.mode &= ~SHMSEG_WANTED; 774 cv_broadcast(&shm_cv[segnum]); 775 } 776 shm_realloc_disable--; 777 cv_broadcast(&shm_realloc_cv); 778 mutex_exit(&shm_lock); 779 780 return error; 781} 782 783void 784shmfork(struct vmspace *vm1, struct vmspace *vm2) 785{ 786 struct shmmap_state *shmmap_s; 787 struct shmmap_entry *shmmap_se; 788 789 SHMPRINTF(("shmfork %p->%p\n", vm1, vm2)); 790 mutex_enter(&shm_lock); 791 vm2->vm_shm = vm1->vm_shm; 792 if (vm1->vm_shm) { 793 shmmap_s = (struct shmmap_state *)vm1->vm_shm; 794 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) 795 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch++; 796 shmmap_s->nrefs++; 797 } 798 mutex_exit(&shm_lock); 799} 800 801void 802shmexit(struct vmspace *vm) 803{ 804 struct shmmap_state *shmmap_s; 805 struct shmmap_entry *shmmap_se; 806 struct uvm_object **uobj; 807 size_t *size; 808 u_int i, n; 809 810 SLIST_HEAD(, shmmap_entry) tmp_entries; 811 812 mutex_enter(&shm_lock); 813 shmmap_s = (struct shmmap_state *)vm->vm_shm; 814 if (shmmap_s == NULL) { 815 mutex_exit(&shm_lock); 816 return; 817 } 818 819 vm->vm_shm = NULL; 820 821 if (--shmmap_s->nrefs > 0) { 822 SHMPRINTF(("shmexit: vm %p drop ref (%d entries), refs = %d\n", 823 vm, shmmap_s->nitems, shmmap_s->nrefs)); 824 SLIST_FOREACH(shmmap_se, &shmmap_s->entries, next) 825 shmsegs[IPCID_TO_IX(shmmap_se->shmid)].shm_nattch--; 826 mutex_exit(&shm_lock); 827 return; 828 } 829 830 KASSERT(shmmap_s->nrefs == 0); 831 n = shmmap_s->nitems; 832 SHMPRINTF(("shmexit: vm %p cleanup (%d entries)\n", vm, n)); 833 mutex_exit(&shm_lock); 834 if (n == 0) { 835 kmem_free(shmmap_s, sizeof(struct shmmap_state)); 836 return; 837 } 838 839 /* Allocate the arrays */ 840 SLIST_INIT(&tmp_entries); 841 uobj = kmem_zalloc(n * sizeof(void *), KM_SLEEP); 842 size = kmem_zalloc(n * sizeof(size_t), KM_SLEEP); 843 844 /* Delete the entry from shm map */ 845 i = 0; 846 mutex_enter(&shm_lock); 847 while (!SLIST_EMPTY(&shmmap_s->entries)) { 848 struct shmid_ds *shmseg; 849 850 shmmap_se = SLIST_FIRST(&shmmap_s->entries); 851 shmseg = &shmsegs[IPCID_TO_IX(shmmap_se->shmid)]; 852 size[i] = (shmseg->shm_segsz + PGOFSET) & ~PGOFSET; 853 uobj[i] = shm_delete_mapping(shmmap_s, shmmap_se); 854 SLIST_INSERT_HEAD(&tmp_entries, shmmap_se, next); 855 i++; 856 } 857 mutex_exit(&shm_lock); 858 859 /* Unmap all segments, free the entries */ 860 i = 0; 861 while (!SLIST_EMPTY(&tmp_entries)) { 862 KASSERT(i < n); 863 shmmap_se = SLIST_FIRST(&tmp_entries); 864 SLIST_REMOVE(&tmp_entries, shmmap_se, shmmap_entry, next); 865 uvm_deallocate(&vm->vm_map, shmmap_se->va, size[i]); 866 if (uobj[i] != NULL) 867 uao_detach(uobj[i]); 868 pool_put(&shmmap_entry_pool, shmmap_se); 869 i++; 870 } 871 872 kmem_free(uobj, n * sizeof(void *)); 873 kmem_free(size, n * sizeof(size_t)); 874 kmem_free(shmmap_s, sizeof(struct shmmap_state)); 875} 876 877static int 878shmrealloc(int newshmni) 879{ 880 vaddr_t v; 881 struct shmid_ds *oldshmsegs, *newshmsegs; 882 kcondvar_t *newshm_cv; 883 size_t sz; 884 int i, lsegid; 885 886 if (newshmni < 1) 887 return EINVAL; 888 889 /* Allocate new memory area */ 890 sz = ALIGN(newshmni * sizeof(struct shmid_ds)) + 891 ALIGN(newshmni * sizeof(kcondvar_t)); 892 v = uvm_km_alloc(kernel_map, round_page(sz), 0, 893 UVM_KMF_WIRED|UVM_KMF_ZERO); 894 if (v == 0) 895 return ENOMEM; 896 897 mutex_enter(&shm_lock); 898 while (shm_realloc_state || shm_realloc_disable) 899 cv_wait(&shm_realloc_cv, &shm_lock); 900 901 /* 902 * Get the number of last segment. Fail we are trying to 903 * reallocate less memory than we use. 904 */ 905 lsegid = 0; 906 for (i = 0; i < shminfo.shmmni; i++) 907 if ((shmsegs[i].shm_perm.mode & SHMSEG_FREE) == 0) 908 lsegid = i; 909 if (lsegid >= newshmni) { 910 mutex_exit(&shm_lock); 911 uvm_km_free(kernel_map, v, sz, UVM_KMF_WIRED); 912 return EBUSY; 913 } 914 shm_realloc_state = true; 915 916 newshmsegs = (void *)v; 917 newshm_cv = (void *)(ALIGN(newshmsegs) + 918 newshmni * sizeof(struct shmid_ds)); 919 920 /* Copy all memory to the new area */ 921 for (i = 0; i < shm_nused; i++) 922 (void)memcpy(&newshmsegs[i], &shmsegs[i], 923 sizeof(newshmsegs[0])); 924 925 /* Mark as free all new segments, if there is any */ 926 for (; i < newshmni; i++) { 927 cv_init(&newshm_cv[i], "shmwait"); 928 newshmsegs[i].shm_perm.mode = SHMSEG_FREE; 929 newshmsegs[i].shm_perm._seq = 0; 930 } 931 932 oldshmsegs = shmsegs; 933 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) + 934 ALIGN(shminfo.shmmni * sizeof(kcondvar_t)); 935 936 shminfo.shmmni = newshmni; 937 shmsegs = newshmsegs; 938 shm_cv = newshm_cv; 939 940 /* Reallocation completed - notify all waiters, if any */ 941 shm_realloc_state = false; 942 cv_broadcast(&shm_realloc_cv); 943 mutex_exit(&shm_lock); 944 945 uvm_km_free(kernel_map, (vaddr_t)oldshmsegs, sz, UVM_KMF_WIRED); 946 return 0; 947} 948 949void 950shminit(void) 951{ 952 vaddr_t v; 953 size_t sz; 954 int i; 955 956 mutex_init(&shm_lock, MUTEX_DEFAULT, IPL_NONE); 957 pool_init(&shmmap_entry_pool, sizeof(struct shmmap_entry), 0, 0, 0, 958 "shmmp", &pool_allocator_nointr, IPL_NONE); 959 cv_init(&shm_realloc_cv, "shmrealc"); 960 961 /* Allocate the wired memory for our structures */ 962 sz = ALIGN(shminfo.shmmni * sizeof(struct shmid_ds)) + 963 ALIGN(shminfo.shmmni * sizeof(kcondvar_t)); 964 v = uvm_km_alloc(kernel_map, round_page(sz), 0, 965 UVM_KMF_WIRED|UVM_KMF_ZERO); 966 if (v == 0) 967 panic("sysv_shm: cannot allocate memory"); 968 shmsegs = (void *)v; 969 shm_cv = (void *)(ALIGN(shmsegs) + 970 shminfo.shmmni * sizeof(struct shmid_ds)); 971 972 shminfo.shmmax *= PAGE_SIZE; 973 974 for (i = 0; i < shminfo.shmmni; i++) { 975 cv_init(&shm_cv[i], "shmwait"); 976 shmsegs[i].shm_perm.mode = SHMSEG_FREE; 977 shmsegs[i].shm_perm._seq = 0; 978 } 979 shm_last_free = 0; 980 shm_nused = 0; 981 shm_committed = 0; 982 shm_realloc_disable = 0; 983 shm_realloc_state = false; 984} 985 986static int 987sysctl_ipc_shmmni(SYSCTLFN_ARGS) 988{ 989 int newsize, error; 990 struct sysctlnode node; 991 node = *rnode; 992 node.sysctl_data = &newsize; 993 994 newsize = shminfo.shmmni; 995 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 996 if (error || newp == NULL) 997 return error; 998 999 sysctl_unlock(); 1000 error = shmrealloc(newsize); 1001 sysctl_relock(); 1002 return error; 1003} 1004 1005static int 1006sysctl_ipc_shmmaxpgs(SYSCTLFN_ARGS) 1007{ 1008 int newsize, error; 1009 struct sysctlnode node; 1010 node = *rnode; 1011 node.sysctl_data = &newsize; 1012 1013 newsize = shminfo.shmall; 1014 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1015 if (error || newp == NULL) 1016 return error; 1017 1018 if (newsize < 1) 1019 return EINVAL; 1020 1021 shminfo.shmall = newsize; 1022 shminfo.shmmax = shminfo.shmall * PAGE_SIZE; 1023 1024 return 0; 1025} 1026 1027SYSCTL_SETUP(sysctl_ipc_shm_setup, "sysctl kern.ipc subtree setup") 1028{ 1029 1030 sysctl_createv(clog, 0, NULL, NULL, 1031 CTLFLAG_PERMANENT, 1032 CTLTYPE_NODE, "kern", NULL, 1033 NULL, 0, NULL, 0, 1034 CTL_KERN, CTL_EOL); 1035 sysctl_createv(clog, 0, NULL, NULL, 1036 CTLFLAG_PERMANENT, 1037 CTLTYPE_NODE, "ipc", 1038 SYSCTL_DESCR("SysV IPC options"), 1039 NULL, 0, NULL, 0, 1040 CTL_KERN, KERN_SYSVIPC, CTL_EOL); 1041 sysctl_createv(clog, 0, NULL, NULL, 1042 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 1043 CTLTYPE_INT, "shmmax", 1044 SYSCTL_DESCR("Max shared memory segment size in bytes"), 1045 NULL, 0, &shminfo.shmmax, 0, 1046 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAX, CTL_EOL); 1047 sysctl_createv(clog, 0, NULL, NULL, 1048 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1049 CTLTYPE_INT, "shmmni", 1050 SYSCTL_DESCR("Max number of shared memory identifiers"), 1051 sysctl_ipc_shmmni, 0, &shminfo.shmmni, 0, 1052 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMNI, CTL_EOL); 1053 sysctl_createv(clog, 0, NULL, NULL, 1054 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1055 CTLTYPE_INT, "shmseg", 1056 SYSCTL_DESCR("Max shared memory segments per process"), 1057 NULL, 0, &shminfo.shmseg, 0, 1058 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMSEG, CTL_EOL); 1059 sysctl_createv(clog, 0, NULL, NULL, 1060 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1061 CTLTYPE_INT, "shmmaxpgs", 1062 SYSCTL_DESCR("Max amount of shared memory in pages"), 1063 sysctl_ipc_shmmaxpgs, 0, &shminfo.shmall, 0, 1064 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMMAXPGS, CTL_EOL); 1065 sysctl_createv(clog, 0, NULL, NULL, 1066 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1067 CTLTYPE_INT, "shm_use_phys", 1068 SYSCTL_DESCR("Enable/disable locking of shared memory in " 1069 "physical memory"), NULL, 0, &shm_use_phys, 0, 1070 CTL_KERN, KERN_SYSVIPC, KERN_SYSVIPC_SHMUSEPHYS, CTL_EOL); 1071} 1072