1/* 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: ipc/ipc_entry.c 60 * Author: Rich Draves 61 * Date: 1989 62 * 63 * Primitive functions to manipulate translation entries. 64 */ 65 66#include <mach_debug.h> 67 68#include <mach/kern_return.h> 69#include <mach/port.h> 70#include <kern/assert.h> 71#include <kern/sched_prim.h> 72#include <kern/zalloc.h> 73#include <kern/misc_protos.h> 74#include <ipc/port.h> 75#include <ipc/ipc_entry.h> 76#include <ipc/ipc_space.h> 77#include <ipc/ipc_object.h> 78#include <ipc/ipc_hash.h> 79#include <ipc/ipc_table.h> 80#include <ipc/ipc_port.h> 81#include <string.h> 82 83/* 84 * Routine: ipc_entry_lookup 85 * Purpose: 86 * Searches for an entry, given its name. 87 * Conditions: 88 * The space must be read or write locked throughout. 89 * The space must be active. 90 */ 91 92ipc_entry_t 93ipc_entry_lookup( 94 ipc_space_t space, 95 mach_port_name_t name) 96{ 97 mach_port_index_t index; 98 ipc_entry_t entry; 99 100 assert(is_active(space)); 101 102 103 index = MACH_PORT_INDEX(name); 104 if (index < space->is_table_size) { 105 entry = &space->is_table[index]; 106 if (IE_BITS_GEN(entry->ie_bits) != MACH_PORT_GEN(name) || 107 IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE) 108 entry = IE_NULL; 109 } 110 else { 111 entry = IE_NULL; 112 } 113 114 assert((entry == IE_NULL) || IE_BITS_TYPE(entry->ie_bits)); 115 return entry; 116} 117 118/* 119 * Routine: ipc_entry_get 120 * Purpose: 121 * Tries to allocate an entry out of the space. 122 * Conditions: 123 * The space is write-locked and active throughout. 124 * An object may be locked. Will not allocate memory. 125 * Returns: 126 * KERN_SUCCESS A free entry was found. 127 * KERN_NO_SPACE No entry allocated. 128 */ 129 130kern_return_t 131ipc_entry_get( 132 ipc_space_t space, 133 mach_port_name_t *namep, 134 ipc_entry_t *entryp) 135{ 136 ipc_entry_t table; 137 mach_port_index_t first_free; 138 ipc_entry_t free_entry; 139 140 assert(is_active(space)); 141 142 { 143 table = space->is_table; 144 first_free = table->ie_next; 145 146 if (first_free == 0) 147 return KERN_NO_SPACE; 148 149 assert(first_free < space->is_table_size); 150 free_entry = &table[first_free]; 151 table->ie_next = free_entry->ie_next; 152 } 153 154 /* 155 * Initialize the new entry. We need only 156 * increment the generation number and clear ie_request. 157 */ 158 { 159 mach_port_name_t new_name; 160 mach_port_gen_t gen; 161 162 gen = IE_BITS_NEW_GEN(free_entry->ie_bits); 163 free_entry->ie_bits = gen; 164 free_entry->ie_request = IE_REQ_NONE; 165 166 /* 167 * The new name can't be MACH_PORT_NULL because index 168 * is non-zero. It can't be MACH_PORT_DEAD because 169 * the table isn't allowed to grow big enough. 170 * (See comment in ipc/ipc_table.h.) 171 */ 172 new_name = MACH_PORT_MAKE(first_free, gen); 173 assert(MACH_PORT_VALID(new_name)); 174 *namep = new_name; 175 } 176 177 assert(free_entry->ie_object == IO_NULL); 178 179 *entryp = free_entry; 180 return KERN_SUCCESS; 181} 182 183/* 184 * Routine: ipc_entry_alloc 185 * Purpose: 186 * Allocate an entry out of the space. 187 * Conditions: 188 * The space is not locked before, but it is write-locked after 189 * if the call is successful. May allocate memory. 190 * Returns: 191 * KERN_SUCCESS An entry was allocated. 192 * KERN_INVALID_TASK The space is dead. 193 * KERN_NO_SPACE No room for an entry in the space. 194 * KERN_RESOURCE_SHORTAGE Couldn't allocate memory for an entry. 195 */ 196 197kern_return_t 198ipc_entry_alloc( 199 ipc_space_t space, 200 mach_port_name_t *namep, 201 ipc_entry_t *entryp) 202{ 203 kern_return_t kr; 204 205 is_write_lock(space); 206 207 for (;;) { 208 if (!is_active(space)) { 209 is_write_unlock(space); 210 return KERN_INVALID_TASK; 211 } 212 213 kr = ipc_entry_get(space, namep, entryp); 214 if (kr == KERN_SUCCESS) 215 return kr; 216 217 kr = ipc_entry_grow_table(space, ITS_SIZE_NONE); 218 if (kr != KERN_SUCCESS) 219 return kr; /* space is unlocked */ 220 } 221} 222 223/* 224 * Routine: ipc_entry_alloc_name 225 * Purpose: 226 * Allocates/finds an entry with a specific name. 227 * If an existing entry is returned, its type will be nonzero. 228 * Conditions: 229 * The space is not locked before, but it is write-locked after 230 * if the call is successful. May allocate memory. 231 * Returns: 232 * KERN_SUCCESS Found existing entry with same name. 233 * KERN_SUCCESS Allocated a new entry. 234 * KERN_INVALID_TASK The space is dead. 235 * KERN_RESOURCE_SHORTAGE Couldn't allocate memory. 236 * KERN_FAILURE Couldn't allocate requested name. 237 */ 238 239kern_return_t 240ipc_entry_alloc_name( 241 ipc_space_t space, 242 mach_port_name_t name, 243 ipc_entry_t *entryp) 244{ 245 mach_port_index_t index = MACH_PORT_INDEX(name); 246 mach_port_gen_t gen = MACH_PORT_GEN(name); 247 248 assert(MACH_PORT_VALID(name)); 249 250 251 is_write_lock(space); 252 253 for (;;) { 254 ipc_entry_t entry; 255 256 if (!is_active(space)) { 257 is_write_unlock(space); 258 return KERN_INVALID_TASK; 259 } 260 261 /* 262 * If we are under the table cutoff, 263 * there are usually four cases: 264 * 1) The entry is reserved (index 0) 265 * 2) The entry is inuse, for the same name 266 * 3) The entry is inuse, for a different name 267 * 4) The entry is free 268 * For a task with a "fast" IPC space, we disallow 269 * cases 1) and 3), because ports cannot be renamed. 270 */ 271 if (index < space->is_table_size) { 272 ipc_entry_t table = space->is_table; 273 274 entry = &table[index]; 275 276 if (index == 0) { 277 /* case #1 - the entry is reserved */ 278 assert(!IE_BITS_TYPE(entry->ie_bits)); 279 assert(!IE_BITS_GEN(entry->ie_bits)); 280 is_write_unlock(space); 281 return KERN_FAILURE; 282 } else if (IE_BITS_TYPE(entry->ie_bits)) { 283 if (IE_BITS_GEN(entry->ie_bits) == gen) { 284 /* case #2 -- the entry is inuse, for the same name */ 285 *entryp = entry; 286 return KERN_SUCCESS; 287 } else { 288 /* case #3 -- the entry is inuse, for a different name. */ 289 /* Collisions are not allowed */ 290 is_write_unlock(space); 291 return KERN_FAILURE; 292 } 293 } else { 294 mach_port_index_t free_index, next_index; 295 296 /* 297 * case #4 -- the entry is free 298 * Rip the entry out of the free list. 299 */ 300 301 for (free_index = 0; 302 (next_index = table[free_index].ie_next) 303 != index; 304 free_index = next_index) 305 continue; 306 307 table[free_index].ie_next = 308 table[next_index].ie_next; 309 310 /* mark the previous entry modified - reconstructing the name */ 311 ipc_entry_modified(space, 312 MACH_PORT_MAKE(free_index, 313 IE_BITS_GEN(table[free_index].ie_bits)), 314 &table[free_index]); 315 316 entry->ie_bits = gen; 317 entry->ie_request = IE_REQ_NONE; 318 *entryp = entry; 319 320 assert(entry->ie_object == IO_NULL); 321 return KERN_SUCCESS; 322 } 323 } 324 325 /* 326 * We grow the table so that the name 327 * index fits in the array space. 328 * Because the space will be unlocked, 329 * we must restart. 330 */ 331 kern_return_t kr; 332 kr = ipc_entry_grow_table(space, index); 333 assert(kr != KERN_NO_SPACE); 334 if (kr != KERN_SUCCESS) { 335 /* space is unlocked */ 336 return kr; 337 } 338 continue; 339 } 340} 341 342/* 343 * Routine: ipc_entry_dealloc 344 * Purpose: 345 * Deallocates an entry from a space. 346 * Conditions: 347 * The space must be write-locked throughout. 348 * The space must be active. 349 */ 350 351void 352ipc_entry_dealloc( 353 ipc_space_t space, 354 mach_port_name_t name, 355 ipc_entry_t entry) 356{ 357 ipc_entry_t table; 358 ipc_entry_num_t size; 359 mach_port_index_t index; 360 361 assert(is_active(space)); 362 assert(entry->ie_object == IO_NULL); 363 assert(entry->ie_request == IE_REQ_NONE); 364 365#if 1 366 if (entry->ie_request != IE_REQ_NONE) 367 panic("ipc_entry_dealloc()\n"); 368#endif 369 370 index = MACH_PORT_INDEX(name); 371 table = space->is_table; 372 size = space->is_table_size; 373 374 if ((index < size) && (entry == &table[index])) { 375 assert(IE_BITS_GEN(entry->ie_bits) == MACH_PORT_GEN(name)); 376 entry->ie_bits &= IE_BITS_GEN_MASK; 377 entry->ie_next = table->ie_next; 378 table->ie_next = index; 379 } else { 380 /* 381 * Nothing to do. The entry does not match 382 * so there is nothing to deallocate. 383 */ 384 assert(index < size); 385 assert(entry == &table[index]); 386 assert(IE_BITS_GEN(entry->ie_bits) == MACH_PORT_GEN(name)); 387 } 388 ipc_entry_modified(space, name, entry); 389} 390 391/* 392 * Routine: ipc_entry_modified 393 * Purpose: 394 * Note that an entry was modified in a space. 395 * Conditions: 396 * Assumes exclusive write access to the space, 397 * either through a write lock or being the cleaner 398 * on an inactive space. 399 */ 400 401void 402ipc_entry_modified( 403 ipc_space_t space, 404 mach_port_name_t name, 405 __assert_only ipc_entry_t entry) 406{ 407 ipc_entry_t table; 408 ipc_entry_num_t size; 409 mach_port_index_t index; 410 411 index = MACH_PORT_INDEX(name); 412 table = space->is_table; 413 size = space->is_table_size; 414 415 assert(index < size); 416 assert(entry == &table[index]); 417 418 assert(space->is_low_mod <= size); 419 assert(space->is_high_mod < size); 420 421 if (index < space->is_low_mod) 422 space->is_low_mod = index; 423 if (index > space->is_high_mod) 424 space->is_high_mod = index; 425} 426 427#define IPC_ENTRY_GROW_STATS 1 428#if IPC_ENTRY_GROW_STATS 429static uint64_t ipc_entry_grow_count = 0; 430static uint64_t ipc_entry_grow_rescan = 0; 431static uint64_t ipc_entry_grow_rescan_max = 0; 432static uint64_t ipc_entry_grow_rescan_entries = 0; 433static uint64_t ipc_entry_grow_rescan_entries_max = 0; 434static uint64_t ipc_entry_grow_freelist_entries = 0; 435static uint64_t ipc_entry_grow_freelist_entries_max = 0; 436#endif 437 438/* 439 * Routine: ipc_entry_grow_table 440 * Purpose: 441 * Grows the table in a space. 442 * Conditions: 443 * The space must be write-locked and active before. 444 * If successful, the space is also returned locked. 445 * On failure, the space is returned unlocked. 446 * Allocates memory. 447 * Returns: 448 * KERN_SUCCESS Grew the table. 449 * KERN_SUCCESS Somebody else grew the table. 450 * KERN_SUCCESS The space died. 451 * KERN_NO_SPACE Table has maximum size already. 452 * KERN_RESOURCE_SHORTAGE Couldn't allocate a new table. 453 */ 454 455kern_return_t 456ipc_entry_grow_table( 457 ipc_space_t space, 458 ipc_table_elems_t target_size) 459{ 460 ipc_entry_num_t osize, size, nsize, psize; 461 462 ipc_entry_t otable, table; 463 ipc_table_size_t oits, its, nits; 464 mach_port_index_t i, free_index; 465 mach_port_index_t low_mod, hi_mod; 466 ipc_table_index_t sanity; 467#if IPC_ENTRY_GROW_STATS 468 uint64_t rescan_count = 0; 469#endif 470 assert(is_active(space)); 471 472 if (is_growing(space)) { 473 /* 474 * Somebody else is growing the table. 475 * We just wait for them to finish. 476 */ 477 478 is_write_sleep(space); 479 return KERN_SUCCESS; 480 } 481 482 otable = space->is_table; 483 484 its = space->is_table_next; 485 size = its->its_size; 486 487 /* 488 * Since is_table_next points to the next natural size 489 * we can identify the current size entry. 490 */ 491 oits = its - 1; 492 osize = oits->its_size; 493 494 /* 495 * If there is no target size, then the new size is simply 496 * specified by is_table_next. If there is a target 497 * size, then search for the next entry. 498 */ 499 if (target_size != ITS_SIZE_NONE) { 500 if (target_size <= osize) { 501 /* the space is locked */ 502 return KERN_SUCCESS; 503 } 504 505 psize = osize; 506 while ((psize != size) && (target_size > size)) { 507 psize = size; 508 its++; 509 size = its->its_size; 510 } 511 if (psize == size) { 512 is_write_unlock(space); 513 return KERN_NO_SPACE; 514 } 515 } 516 517 if (osize == size) { 518 is_write_unlock(space); 519 return KERN_NO_SPACE; 520 } 521 522 nits = its + 1; 523 nsize = nits->its_size; 524 assert((osize < size) && (size <= nsize)); 525 526 /* 527 * We'll attempt to grow the table. 528 * 529 * Because we will be copying without the space lock, reset 530 * the lowest_mod index to just beyond the end of the current 531 * table. Modification of entries (other than hashes) will 532 * bump this downward, and we only have to reprocess entries 533 * above that mark. Eventually, we'll get done. 534 */ 535 is_start_growing(space); 536 space->is_low_mod = osize; 537 space->is_high_mod = 0; 538#if IPC_ENTRY_GROW_STATS 539 ipc_entry_grow_count++; 540#endif 541 is_write_unlock(space); 542 543 table = it_entries_alloc(its); 544 if (table == IE_NULL) { 545 is_write_lock(space); 546 is_done_growing(space); 547 is_write_unlock(space); 548 thread_wakeup((event_t) space); 549 return KERN_RESOURCE_SHORTAGE; 550 } 551 552 /* initialize new entries (free chain in backwards order) */ 553 for (i = osize; i < size; i++) { 554 table[i].ie_object = IO_NULL; 555 table[i].ie_bits = IE_BITS_GEN_MASK; 556 table[i].ie_index = 0; 557 table[i].ie_next = i + 1; 558 } 559 table[size-1].ie_next = 0; 560 561 /* clear out old entries in new table */ 562 memset((void *)table, 0, osize * sizeof(*table)); 563 564 low_mod = 0; 565 hi_mod = osize - 1; 566 rescan: 567 /* 568 * Within the range of the table that changed, determine what we 569 * have to take action on. For each entry, take a snapshot of the 570 * corresponding entry in the old table (so it won't change 571 * during this iteration). The snapshot may not be self-consistent 572 * (if we caught it in the middle of being changed), so be very 573 * cautious with the values. 574 */ 575 for (i = low_mod; i <= hi_mod; i++) { 576 ipc_entry_t entry = &table[i]; 577 struct ipc_entry osnap = otable[i]; 578 579 if (entry->ie_object != osnap.ie_object || 580 IE_BITS_TYPE(entry->ie_bits) != IE_BITS_TYPE(osnap.ie_bits)) { 581 582 if (entry->ie_object != IO_NULL && 583 IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_SEND) 584 ipc_hash_table_delete(table, size, entry->ie_object, i, entry); 585 586 entry->ie_object = osnap.ie_object; 587 entry->ie_bits = osnap.ie_bits; 588 entry->ie_request = osnap.ie_request; /* or ie_next */ 589 590 if (entry->ie_object != IO_NULL && 591 IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_SEND) 592 ipc_hash_table_insert(table, size, entry->ie_object, i, entry); 593 } else { 594 assert(entry->ie_object == osnap.ie_object); 595 entry->ie_bits = osnap.ie_bits; 596 entry->ie_request = osnap.ie_request; /* or ie_next */ 597 } 598 599 } 600 table[0].ie_next = otable[0].ie_next; /* always rebase the freelist */ 601 602 /* 603 * find the end of the freelist (should be short). But be careful, 604 * the list items can change so only follow through truly free entries 605 * (no problem stopping short in those cases, because we'll rescan). 606 */ 607 free_index = 0; 608 for (sanity = 0; sanity < osize; sanity++) { 609 if (table[free_index].ie_object != IPC_OBJECT_NULL) 610 break; 611 i = table[free_index].ie_next; 612 if (i == 0 || i >= osize) 613 break; 614 free_index = i; 615 } 616#if IPC_ENTRY_GROW_STATS 617 ipc_entry_grow_freelist_entries += sanity; 618 if (sanity > ipc_entry_grow_freelist_entries_max) 619 ipc_entry_grow_freelist_entries_max = sanity; 620#endif 621 622 is_write_lock(space); 623 624 /* 625 * We need to do a wakeup on the space, 626 * to rouse waiting threads. We defer 627 * this until the space is unlocked, 628 * because we don't want them to spin. 629 */ 630 631 if (!is_active(space)) { 632 /* 633 * The space died while it was unlocked. 634 */ 635 636 is_done_growing(space); 637 is_write_unlock(space); 638 thread_wakeup((event_t) space); 639 it_entries_free(its, table); 640 is_write_lock(space); 641 return KERN_SUCCESS; 642 } 643 644 /* If the space changed while unlocked, go back and process the changes */ 645 if (space->is_low_mod < osize) { 646 assert(space->is_high_mod > 0); 647 low_mod = space->is_low_mod; 648 space->is_low_mod = osize; 649 hi_mod = space->is_high_mod; 650 space->is_high_mod = 0; 651 is_write_unlock(space); 652#if IPC_ENTRY_GROW_STATS 653 rescan_count++; 654 if (rescan_count > ipc_entry_grow_rescan_max) 655 ipc_entry_grow_rescan_max = rescan_count; 656 657 ipc_entry_grow_rescan++; 658 ipc_entry_grow_rescan_entries += hi_mod - low_mod + 1; 659 if (hi_mod - low_mod + 1 > ipc_entry_grow_rescan_entries_max) 660 ipc_entry_grow_rescan_entries_max = hi_mod - low_mod + 1; 661#endif 662 goto rescan; 663 } 664 665 /* link new free entries onto the rest of the freelist */ 666 assert(table[free_index].ie_next == 0 && 667 table[free_index].ie_object == IO_NULL); 668 table[free_index].ie_next = osize; 669 670 assert(space->is_table == otable); 671 assert((space->is_table_next == its) || 672 (target_size != ITS_SIZE_NONE)); 673 assert(space->is_table_size == osize); 674 675 space->is_table = table; 676 space->is_table_size = size; 677 space->is_table_next = nits; 678 679 is_done_growing(space); 680 is_write_unlock(space); 681 682 thread_wakeup((event_t) space); 683 684 /* 685 * Now we need to free the old table. 686 */ 687 it_entries_free(oits, otable); 688 is_write_lock(space); 689 690 return KERN_SUCCESS; 691} 692