1/* 2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56 57/* 58 * Default Pager. 59 * Memory Object Management. 60 */ 61 62#include "default_pager_internal.h" 63#include <default_pager/default_pager_object_server.h> 64#include <mach/memory_object_default_server.h> 65#include <mach/memory_object_control.h> 66#include <mach/memory_object_types.h> 67#include <mach/memory_object_server.h> 68#include <mach/upl.h> 69#include <mach/vm_map.h> 70#include <vm/memory_object.h> 71#include <vm/vm_pageout.h> 72#include <vm/vm_map.h> 73#include <vm/vm_protos.h> 74 75/* forward declaration */ 76vstruct_t vs_object_create(dp_size_t size); 77 78/* 79 * List of all vstructs. A specific vstruct is 80 * found directly via its port, this list is 81 * only used for monitoring purposes by the 82 * default_pager_object* calls and by ps_delete 83 * when abstract memory objects must be scanned 84 * to remove any live storage on a segment which 85 * is to be removed. 86 */ 87struct vstruct_list_head vstruct_list; 88 89__private_extern__ void 90vstruct_list_insert( 91 vstruct_t vs) 92{ 93 VSL_LOCK(); 94 queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links); 95 vstruct_list.vsl_count++; 96 VSL_UNLOCK(); 97} 98 99 100__private_extern__ void 101vstruct_list_delete( 102 vstruct_t vs) 103{ 104 queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links); 105 vstruct_list.vsl_count--; 106} 107 108/* 109 * We use the sequence numbers on requests to regulate 110 * our parallelism. In general, we allow multiple reads and writes 111 * to proceed in parallel, with the exception that reads must 112 * wait for previous writes to finish. (Because the kernel might 113 * generate a data-request for a page on the heels of a data-write 114 * for the same page, and we must avoid returning stale data.) 115 * terminate requests wait for proceeding reads and writes to finish. 116 */ 117 118static unsigned int default_pager_total = 0; /* debugging */ 119static unsigned int default_pager_wait_seqno = 0; /* debugging */ 120static unsigned int default_pager_wait_read = 0; /* debugging */ 121static unsigned int default_pager_wait_write = 0; /* debugging */ 122 123__private_extern__ void 124vs_async_wait( 125 vstruct_t vs) 126{ 127 128 ASSERT(vs->vs_async_pending >= 0); 129 while (vs->vs_async_pending > 0) { 130 vs->vs_waiting_async = TRUE; 131 assert_wait(&vs->vs_async_pending, THREAD_UNINT); 132 VS_UNLOCK(vs); 133 thread_block(THREAD_CONTINUE_NULL); 134 VS_LOCK(vs); 135 } 136 ASSERT(vs->vs_async_pending == 0); 137} 138 139 140#if PARALLEL 141/* 142 * Waits for correct sequence number. Leaves pager locked. 143 * 144 * JMM - Sequence numbers guarantee ordering of requests generated 145 * by a single thread if the receiver is multithreaded and 146 * the interfaces are asynchronous (i.e. sender can generate 147 * more than one request before the first is received in the 148 * pager). Normally, IPC would generate these number in that 149 * case. But we are trying to avoid using IPC for the in-kernel 150 * scenario. Since these are actually invoked synchronously 151 * anyway (in-kernel), we can just fake the sequence number 152 * generation here (thus avoiding the dependence on IPC). 153 */ 154__private_extern__ void 155vs_lock( 156 vstruct_t vs) 157{ 158 mach_port_seqno_t seqno; 159 160 default_pager_total++; 161 VS_LOCK(vs); 162 163 seqno = vs->vs_next_seqno++; 164 165 while (vs->vs_seqno != seqno) { 166 default_pager_wait_seqno++; 167 vs->vs_waiting_seqno = TRUE; 168 assert_wait(&vs->vs_seqno, THREAD_UNINT); 169 VS_UNLOCK(vs); 170 thread_block(THREAD_CONTINUE_NULL); 171 VS_LOCK(vs); 172 } 173} 174 175/* 176 * Increments sequence number and unlocks pager. 177 */ 178__private_extern__ void 179vs_unlock(vstruct_t vs) 180{ 181 vs->vs_seqno++; 182 if (vs->vs_waiting_seqno) { 183 vs->vs_waiting_seqno = FALSE; 184 VS_UNLOCK(vs); 185 thread_wakeup(&vs->vs_seqno); 186 return; 187 } 188 VS_UNLOCK(vs); 189} 190 191/* 192 * Start a read - one more reader. Pager must be locked. 193 */ 194__private_extern__ void 195vs_start_read( 196 vstruct_t vs) 197{ 198 vs->vs_readers++; 199} 200 201/* 202 * Wait for readers. Unlocks and relocks pager if wait needed. 203 */ 204__private_extern__ void 205vs_wait_for_readers( 206 vstruct_t vs) 207{ 208 while (vs->vs_readers != 0) { 209 default_pager_wait_read++; 210 vs->vs_waiting_read = TRUE; 211 assert_wait(&vs->vs_readers, THREAD_UNINT); 212 VS_UNLOCK(vs); 213 thread_block(THREAD_CONTINUE_NULL); 214 VS_LOCK(vs); 215 } 216} 217 218/* 219 * Finish a read. Pager is unlocked and returns unlocked. 220 */ 221__private_extern__ void 222vs_finish_read( 223 vstruct_t vs) 224{ 225 VS_LOCK(vs); 226 if (--vs->vs_readers == 0 && vs->vs_waiting_read) { 227 vs->vs_waiting_read = FALSE; 228 VS_UNLOCK(vs); 229 thread_wakeup(&vs->vs_readers); 230 return; 231 } 232 VS_UNLOCK(vs); 233} 234 235/* 236 * Start a write - one more writer. Pager must be locked. 237 */ 238__private_extern__ void 239vs_start_write( 240 vstruct_t vs) 241{ 242 vs->vs_writers++; 243} 244 245/* 246 * Wait for writers. Unlocks and relocks pager if wait needed. 247 */ 248__private_extern__ void 249vs_wait_for_writers( 250 vstruct_t vs) 251{ 252 while (vs->vs_writers != 0) { 253 default_pager_wait_write++; 254 vs->vs_waiting_write = TRUE; 255 assert_wait(&vs->vs_writers, THREAD_UNINT); 256 VS_UNLOCK(vs); 257 thread_block(THREAD_CONTINUE_NULL); 258 VS_LOCK(vs); 259 } 260 vs_async_wait(vs); 261} 262 263/* This is to be used for the transfer from segment code ONLY */ 264/* The transfer code holds off vs destruction by keeping the */ 265/* vs_async_wait count non-zero. It will not ocnflict with */ 266/* other writers on an async basis because it only writes on */ 267/* a cluster basis into fresh (as of sync time) cluster locations */ 268 269__private_extern__ void 270vs_wait_for_sync_writers( 271 vstruct_t vs) 272{ 273 while (vs->vs_writers != 0) { 274 default_pager_wait_write++; 275 vs->vs_waiting_write = TRUE; 276 assert_wait(&vs->vs_writers, THREAD_UNINT); 277 VS_UNLOCK(vs); 278 thread_block(THREAD_CONTINUE_NULL); 279 VS_LOCK(vs); 280 } 281} 282 283 284/* 285 * Finish a write. Pager is unlocked and returns unlocked. 286 */ 287__private_extern__ void 288vs_finish_write( 289 vstruct_t vs) 290{ 291 VS_LOCK(vs); 292 if (--vs->vs_writers == 0 && vs->vs_waiting_write) { 293 vs->vs_waiting_write = FALSE; 294 VS_UNLOCK(vs); 295 thread_wakeup(&vs->vs_writers); 296 return; 297 } 298 VS_UNLOCK(vs); 299} 300#endif /* PARALLEL */ 301 302vstruct_t 303vs_object_create( 304 dp_size_t size) 305{ 306 vstruct_t vs; 307 308 /* 309 * Allocate a vstruct. If there are any problems, then report them 310 * to the console. 311 */ 312 vs = ps_vstruct_create(size); 313 if (vs == VSTRUCT_NULL) { 314 dprintf(("vs_object_create: unable to allocate %s\n", 315 "-- either run swapon command or reboot")); 316 return VSTRUCT_NULL; 317 } 318 319 return vs; 320} 321 322#if 0 323void default_pager_add(vstruct_t, boolean_t); /* forward */ 324 325void 326default_pager_add( 327 vstruct_t vs, 328 boolean_t internal) 329{ 330 memory_object_t mem_obj = vs->vs_mem_obj; 331 mach_port_t pset; 332 mach_port_mscount_t sync; 333 mach_port_t previous; 334 kern_return_t kr; 335 static char here[] = "default_pager_add"; 336 337 /* 338 * The port currently has a make-send count of zero, 339 * because either we just created the port or we just 340 * received the port in a memory_object_create request. 341 */ 342 343 if (internal) { 344 /* possibly generate an immediate no-senders notification */ 345 sync = 0; 346 pset = default_pager_internal_set; 347 } else { 348 /* delay notification till send right is created */ 349 sync = 1; 350 pset = default_pager_external_set; 351 } 352 353 ip_lock(mem_obj); /* unlocked in nsrequest below */ 354 ipc_port_make_sonce_locked(mem_obj); 355 ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous); 356} 357 358#endif 359 360const struct memory_object_pager_ops default_pager_ops = { 361 dp_memory_object_reference, 362 dp_memory_object_deallocate, 363 dp_memory_object_init, 364 dp_memory_object_terminate, 365 dp_memory_object_data_request, 366 dp_memory_object_data_return, 367 dp_memory_object_data_initialize, 368 dp_memory_object_data_unlock, 369 dp_memory_object_synchronize, 370 dp_memory_object_map, 371 dp_memory_object_last_unmap, 372 dp_memory_object_data_reclaim, 373 "default pager" 374}; 375 376kern_return_t 377dp_memory_object_init( 378 memory_object_t mem_obj, 379 memory_object_control_t control, 380 __unused memory_object_cluster_size_t pager_page_size) 381{ 382 vstruct_t vs; 383 384 assert(pager_page_size == vm_page_size); 385 386 memory_object_control_reference(control); 387 388 vs_lookup(mem_obj, vs); 389 vs_lock(vs); 390 391 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL) 392 Panic("bad request"); 393 394 vs->vs_control = control; 395 vs_unlock(vs); 396 397 return KERN_SUCCESS; 398} 399 400kern_return_t 401dp_memory_object_synchronize( 402 memory_object_t mem_obj, 403 memory_object_offset_t offset, 404 memory_object_size_t length, 405 __unused vm_sync_t flags) 406{ 407 vstruct_t vs; 408 409 vs_lookup(mem_obj, vs); 410 vs_lock(vs); 411 vs_unlock(vs); 412 413 memory_object_synchronize_completed(vs->vs_control, offset, length); 414 415 return KERN_SUCCESS; 416} 417 418kern_return_t 419dp_memory_object_map( 420 __unused memory_object_t mem_obj, 421 __unused vm_prot_t prot) 422{ 423 panic("dp_memory_object_map"); 424 return KERN_FAILURE; 425} 426 427kern_return_t 428dp_memory_object_last_unmap( 429 __unused memory_object_t mem_obj) 430{ 431 panic("dp_memory_object_last_unmap"); 432 return KERN_FAILURE; 433} 434 435kern_return_t 436dp_memory_object_data_reclaim( 437 memory_object_t mem_obj, 438 boolean_t reclaim_backing_store) 439{ 440 vstruct_t vs; 441 kern_return_t retval; 442 443 vs_lookup(mem_obj, vs); 444 for (;;) { 445 vs_lock(vs); 446 vs_async_wait(vs); 447 if (!vs->vs_xfer_pending) { 448 break; 449 } 450 } 451 vs->vs_xfer_pending = TRUE; 452 vs_unlock(vs); 453 454 retval = ps_vstruct_reclaim(vs, TRUE, reclaim_backing_store); 455 456 vs_lock(vs); 457 vs->vs_xfer_pending = FALSE; 458 vs_unlock(vs); 459 460 return retval; 461} 462 463kern_return_t 464dp_memory_object_terminate( 465 memory_object_t mem_obj) 466{ 467 memory_object_control_t control; 468 vstruct_t vs; 469 470 /* 471 * control port is a receive right, not a send right. 472 */ 473 474 vs_lookup(mem_obj, vs); 475 vs_lock(vs); 476 477 /* 478 * Wait for read and write requests to terminate. 479 */ 480 481 vs_wait_for_readers(vs); 482 vs_wait_for_writers(vs); 483 484 /* 485 * After memory_object_terminate both memory_object_init 486 * and a no-senders notification are possible, so we need 487 * to clean up our reference to the memory_object_control 488 * to prepare for a new init. 489 */ 490 491 control = vs->vs_control; 492 vs->vs_control = MEMORY_OBJECT_CONTROL_NULL; 493 494 /* a bit of special case ugliness here. Wakeup any waiting reads */ 495 /* these data requests had to be removed from the seqno traffic */ 496 /* based on a performance bottleneck with large memory objects */ 497 /* the problem will right itself with the new component based */ 498 /* synchronous interface. The new async will be able to return */ 499 /* failure during its sync phase. In the mean time ... */ 500 501 thread_wakeup(&vs->vs_writers); 502 thread_wakeup(&vs->vs_async_pending); 503 504 vs_unlock(vs); 505 506 /* 507 * Now we deallocate our reference on the control. 508 */ 509 memory_object_control_deallocate(control); 510 return KERN_SUCCESS; 511} 512 513void 514dp_memory_object_reference( 515 memory_object_t mem_obj) 516{ 517 vstruct_t vs; 518 519 vs_lookup_safe(mem_obj, vs); 520 if (vs == VSTRUCT_NULL) 521 return; 522 523 VS_LOCK(vs); 524 assert(vs->vs_references > 0); 525 vs->vs_references++; 526 VS_UNLOCK(vs); 527} 528 529void 530dp_memory_object_deallocate( 531 memory_object_t mem_obj) 532{ 533 vstruct_t vs; 534 mach_port_seqno_t seqno; 535 536 /* 537 * Because we don't give out multiple first references 538 * for a memory object, there can't be a race 539 * between getting a deallocate call and creating 540 * a new reference for the object. 541 */ 542 543 vs_lookup_safe(mem_obj, vs); 544 if (vs == VSTRUCT_NULL) 545 return; 546 547 VS_LOCK(vs); 548 if (--vs->vs_references > 0) { 549 VS_UNLOCK(vs); 550 return; 551 } 552 553 seqno = vs->vs_next_seqno++; 554 while (vs->vs_seqno != seqno) { 555 default_pager_wait_seqno++; 556 vs->vs_waiting_seqno = TRUE; 557 assert_wait(&vs->vs_seqno, THREAD_UNINT); 558 VS_UNLOCK(vs); 559 thread_block(THREAD_CONTINUE_NULL); 560 VS_LOCK(vs); 561 } 562 563 vs_async_wait(vs); /* wait for pending async IO */ 564 565 /* do not delete the vs structure until the referencing pointers */ 566 /* in the vstruct list have been expunged */ 567 568 /* get VSL_LOCK out of order by using TRY mechanism */ 569 while(!VSL_LOCK_TRY()) { 570 VS_UNLOCK(vs); 571 VSL_LOCK(); 572 VSL_UNLOCK(); 573 VS_LOCK(vs); 574 vs_async_wait(vs); /* wait for pending async IO */ 575 } 576 577 578 /* 579 * We shouldn't get a deallocation call 580 * when the kernel has the object cached. 581 */ 582 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL) 583 Panic("bad request"); 584 585 /* 586 * Unlock the pager (though there should be no one 587 * waiting for it). 588 */ 589 VS_UNLOCK(vs); 590 591 /* Lock out paging segment removal for the duration of this */ 592 /* call. We are vulnerable to losing a paging segment we rely */ 593 /* on as soon as we remove ourselves from the VSL and unlock */ 594 595 /* Keep our thread from blocking on attempt to trigger backing */ 596 /* store release */ 597 backing_store_release_trigger_disable += 1; 598 599 /* 600 * Remove the memory object port association, and then 601 * the destroy the port itself. We must remove the object 602 * from the port list before deallocating the pager, 603 * because of default_pager_objects. 604 */ 605 vstruct_list_delete(vs); 606 VSL_UNLOCK(); 607 608 ps_vstruct_dealloc(vs); 609 610 VSL_LOCK(); 611 backing_store_release_trigger_disable -= 1; 612 if(backing_store_release_trigger_disable == 0) { 613 thread_wakeup((event_t)&backing_store_release_trigger_disable); 614 } 615 VSL_UNLOCK(); 616} 617 618kern_return_t 619dp_memory_object_data_request( 620 memory_object_t mem_obj, 621 memory_object_offset_t offset, 622 memory_object_cluster_size_t length, 623 __unused vm_prot_t protection_required, 624 memory_object_fault_info_t fault_info) 625{ 626 vstruct_t vs; 627 kern_return_t kr = KERN_SUCCESS; 628 629 GSTAT(global_stats.gs_pagein_calls++); 630 631 632 /* CDY at this moment vs_lookup panics when presented with the wrong */ 633 /* port. As we are expanding this pager to support user interfaces */ 634 /* this should be changed to return kern_failure */ 635 vs_lookup(mem_obj, vs); 636 vs_lock(vs); 637 638 /* We are going to relax the strict sequencing here for performance */ 639 /* reasons. We can do this because we know that the read and */ 640 /* write threads are different and we rely on synchronization */ 641 /* of read and write requests at the cache memory_object level */ 642 /* break out wait_for_writers, all of this goes away when */ 643 /* we get real control of seqno with the new component interface */ 644 645 if (vs->vs_writers != 0) { 646 /* you can't hold on to the seqno and go */ 647 /* to sleep like that */ 648 vs_unlock(vs); /* bump internal count of seqno */ 649 VS_LOCK(vs); 650 while (vs->vs_writers != 0) { 651 default_pager_wait_write++; 652 vs->vs_waiting_write = TRUE; 653 assert_wait(&vs->vs_writers, THREAD_UNINT); 654 VS_UNLOCK(vs); 655 thread_block(THREAD_CONTINUE_NULL); 656 VS_LOCK(vs); 657 vs_async_wait(vs); 658 } 659 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) { 660 VS_UNLOCK(vs); 661 return KERN_FAILURE; 662 } 663 vs_start_read(vs); 664 VS_UNLOCK(vs); 665 } else { 666 vs_start_read(vs); 667 vs_unlock(vs); 668 } 669 670 /* 671 * Request must be on a page boundary and a multiple of pages. 672 */ 673 if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0) 674 Panic("bad alignment"); 675 676 assert((dp_offset_t) offset == offset); 677 kr = pvs_cluster_read(vs, (dp_offset_t) offset, length, fault_info); 678 679 /* Regular data requests have a non-zero length and always return KERN_SUCCESS. 680 Their actual success is determined by the fact that they provide a page or not, 681 i.e whether we call upl_commit() or upl_abort(). A length of 0 means that the 682 caller is only asking if the pager has a copy of that page or not. The answer to 683 that question is provided by the return value. KERN_SUCCESS means that the pager 684 does have that page. 685 */ 686 if(length) { 687 kr = KERN_SUCCESS; 688 } 689 690 vs_finish_read(vs); 691 692 return kr; 693} 694 695/* 696 * memory_object_data_initialize: check whether we already have each page, and 697 * write it if we do not. The implementation is far from optimized, and 698 * also assumes that the default_pager is single-threaded. 699 */ 700/* It is questionable whether or not a pager should decide what is relevant */ 701/* and what is not in data sent from the kernel. Data initialize has been */ 702/* changed to copy back all data sent to it in preparation for its eventual */ 703/* merge with data return. It is the kernel that should decide what pages */ 704/* to write back. As of the writing of this note, this is indeed the case */ 705/* the kernel writes back one page at a time through this interface */ 706 707kern_return_t 708dp_memory_object_data_initialize( 709 memory_object_t mem_obj, 710 memory_object_offset_t offset, 711 memory_object_cluster_size_t size) 712{ 713 vstruct_t vs; 714 715 DP_DEBUG(DEBUG_MO_EXTERNAL, 716 ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n", 717 (int)mem_obj, (int)offset, (int)size)); 718 GSTAT(global_stats.gs_pages_init += atop_32(size)); 719 720 vs_lookup(mem_obj, vs); 721 vs_lock(vs); 722 vs_start_write(vs); 723 vs_unlock(vs); 724 725 /* 726 * Write the data via clustered writes. vs_cluster_write will 727 * loop if the address range specified crosses cluster 728 * boundaries. 729 */ 730 assert((upl_offset_t) offset == offset); 731 vs_cluster_write(vs, 0, (upl_offset_t)offset, size, FALSE, 0); 732 733 vs_finish_write(vs); 734 735 return KERN_SUCCESS; 736} 737 738kern_return_t 739dp_memory_object_data_unlock( 740 __unused memory_object_t mem_obj, 741 __unused memory_object_offset_t offset, 742 __unused memory_object_size_t size, 743 __unused vm_prot_t desired_access) 744{ 745 Panic("dp_memory_object_data_unlock: illegal"); 746 return KERN_FAILURE; 747} 748 749 750/*ARGSUSED8*/ 751kern_return_t 752dp_memory_object_data_return( 753 memory_object_t mem_obj, 754 memory_object_offset_t offset, 755 memory_object_cluster_size_t size, 756 __unused memory_object_offset_t *resid_offset, 757 __unused int *io_error, 758 __unused boolean_t dirty, 759 __unused boolean_t kernel_copy, 760 __unused int upl_flags) 761{ 762 vstruct_t vs; 763 764 DP_DEBUG(DEBUG_MO_EXTERNAL, 765 ("mem_obj=0x%x,offset=0x%x,size=0x%x\n", 766 (int)mem_obj, (int)offset, (int)size)); 767 GSTAT(global_stats.gs_pageout_calls++); 768 769 /* This routine is called by the pageout thread. The pageout thread */ 770 /* cannot be blocked by read activities unless the read activities */ 771 /* Therefore the grant of vs lock must be done on a try versus a */ 772 /* blocking basis. The code below relies on the fact that the */ 773 /* interface is synchronous. Should this interface be again async */ 774 /* for some type of pager in the future the pages will have to be */ 775 /* returned through a separate, asynchronous path. */ 776 777 vs_lookup(mem_obj, vs); 778 779 default_pager_total++; 780 781 /* might be unreachable if VS_TRY_LOCK is, by definition, always true */ 782 __unreachable_ok_push 783 if(!VS_TRY_LOCK(vs)) { 784 /* the call below will not be done by caller when we have */ 785 /* a synchronous interface */ 786 /* return KERN_LOCK_OWNED; */ 787 upl_t upl; 788 unsigned int page_list_count = 0; 789 memory_object_super_upl_request(vs->vs_control, 790 (memory_object_offset_t)offset, 791 size, size, 792 &upl, NULL, &page_list_count, 793 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE 794 | UPL_NO_SYNC | UPL_COPYOUT_FROM); 795 upl_abort(upl,0); 796 upl_deallocate(upl); 797 return KERN_SUCCESS; 798 } 799 __unreachable_ok_pop 800 801 if ((vs->vs_seqno != vs->vs_next_seqno++) 802 || (vs->vs_readers) 803 || (vs->vs_xfer_pending)) { 804 upl_t upl; 805 unsigned int page_list_count = 0; 806 807 vs->vs_next_seqno--; 808 VS_UNLOCK(vs); 809 810 /* the call below will not be done by caller when we have */ 811 /* a synchronous interface */ 812 /* return KERN_LOCK_OWNED; */ 813 memory_object_super_upl_request(vs->vs_control, 814 (memory_object_offset_t)offset, 815 size, size, 816 &upl, NULL, &page_list_count, 817 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE 818 | UPL_NO_SYNC | UPL_COPYOUT_FROM); 819 upl_abort(upl,0); 820 upl_deallocate(upl); 821 return KERN_SUCCESS; 822 } 823 824 if ((size % vm_page_size) != 0) 825 Panic("bad alignment"); 826 827 vs_start_write(vs); 828 829 830 vs->vs_async_pending += 1; /* protect from backing store contraction */ 831 vs_unlock(vs); 832 833 /* 834 * Write the data via clustered writes. vs_cluster_write will 835 * loop if the address range specified crosses cluster 836 * boundaries. 837 */ 838 assert((upl_offset_t) offset == offset); 839 vs_cluster_write(vs, 0, (upl_offset_t) offset, size, FALSE, 0); 840 841 vs_finish_write(vs); 842 843 /* temporary, need a finer lock based on cluster */ 844 845 VS_LOCK(vs); 846 vs->vs_async_pending -= 1; /* release vs_async_wait */ 847 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) { 848 vs->vs_waiting_async = FALSE; 849 VS_UNLOCK(vs); 850 thread_wakeup(&vs->vs_async_pending); 851 } else { 852 VS_UNLOCK(vs); 853 } 854 855 856 return KERN_SUCCESS; 857} 858 859/* 860 * Routine: default_pager_memory_object_create 861 * Purpose: 862 * Handle requests for memory objects from the 863 * kernel. 864 * Notes: 865 * Because we only give out the default memory 866 * manager port to the kernel, we don't have to 867 * be so paranoid about the contents. 868 */ 869kern_return_t 870default_pager_memory_object_create( 871 __unused memory_object_default_t dmm, 872 vm_size_t new_size, 873 memory_object_t *new_mem_obj) 874{ 875 vstruct_t vs; 876 877 assert(dmm == default_pager_object); 878 879 if ((dp_size_t) new_size != new_size) { 880 /* 32-bit overflow */ 881 return KERN_INVALID_ARGUMENT; 882 } 883 884 vs = vs_object_create((dp_size_t) new_size); 885 if (vs == VSTRUCT_NULL) 886 return KERN_RESOURCE_SHORTAGE; 887 888 vs->vs_next_seqno = 0; 889 890 /* 891 * Set up associations between this memory object 892 * and this default_pager structure 893 */ 894 895 vs->vs_pager_ops = &default_pager_ops; 896 vs->vs_pager_header.io_bits = IKOT_MEMORY_OBJECT; 897 898 /* 899 * After this, other threads might receive requests 900 * for this memory object or find it in the port list. 901 */ 902 903 vstruct_list_insert(vs); 904 *new_mem_obj = vs_to_mem_obj(vs); 905 return KERN_SUCCESS; 906} 907 908/* 909 * Create an external object. 910 */ 911kern_return_t 912default_pager_object_create( 913 default_pager_t default_pager, 914 vm_size_t size, 915 memory_object_t *mem_objp) 916{ 917 vstruct_t vs; 918 919 if (default_pager != default_pager_object) 920 return KERN_INVALID_ARGUMENT; 921 922 if ((dp_size_t) size != size) { 923 /* 32-bit overflow */ 924 return KERN_INVALID_ARGUMENT; 925 } 926 927 vs = vs_object_create((dp_size_t) size); 928 if (vs == VSTRUCT_NULL) 929 return KERN_RESOURCE_SHORTAGE; 930 931 /* 932 * Set up associations between the default pager 933 * and this vstruct structure 934 */ 935 vs->vs_pager_ops = &default_pager_ops; 936 vstruct_list_insert(vs); 937 *mem_objp = vs_to_mem_obj(vs); 938 return KERN_SUCCESS; 939} 940 941kern_return_t 942default_pager_objects( 943 default_pager_t default_pager, 944 default_pager_object_array_t *objectsp, 945 mach_msg_type_number_t *ocountp, 946 mach_port_array_t *portsp, 947 mach_msg_type_number_t *pcountp) 948{ 949 vm_offset_t oaddr = 0; /* memory for objects */ 950 vm_size_t osize = 0; /* current size */ 951 default_pager_object_t * objects; 952 unsigned int opotential = 0; 953 954 vm_map_copy_t pcopy = 0; /* copy handle for pagers */ 955 vm_size_t psize = 0; /* current size */ 956 memory_object_t * pagers; 957 unsigned int ppotential = 0; 958 959 unsigned int actual; 960 unsigned int num_objects; 961 kern_return_t kr; 962 vstruct_t entry; 963 964 if (default_pager != default_pager_object) 965 return KERN_INVALID_ARGUMENT; 966 967 /* 968 * We will send no more than this many 969 */ 970 actual = vstruct_list.vsl_count; 971 972 /* 973 * Out out-of-line port arrays are simply kalloc'ed. 974 */ 975 psize = vm_map_round_page(actual * sizeof (*pagers), 976 vm_map_page_mask(ipc_kernel_map)); 977 ppotential = (unsigned int) (psize / sizeof (*pagers)); 978 pagers = (memory_object_t *)kalloc(psize); 979 if (0 == pagers) 980 return KERN_RESOURCE_SHORTAGE; 981 982 /* 983 * returned out of line data must be allocated out 984 * the ipc_kernel_map, wired down, filled in, and 985 * then "copied in" as if it had been sent by a 986 * user process. 987 */ 988 osize = vm_map_round_page(actual * sizeof (*objects), 989 vm_map_page_mask(ipc_kernel_map)); 990 opotential = (unsigned int) (osize / sizeof (*objects)); 991 kr = kmem_alloc(ipc_kernel_map, &oaddr, osize); 992 if (KERN_SUCCESS != kr) { 993 kfree(pagers, psize); 994 return KERN_RESOURCE_SHORTAGE; 995 } 996 objects = (default_pager_object_t *)oaddr; 997 998 999 /* 1000 * Now scan the list. 1001 */ 1002 1003 VSL_LOCK(); 1004 1005 num_objects = 0; 1006 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) { 1007 1008 memory_object_t pager; 1009 vm_size_t size; 1010 1011 if ((num_objects >= opotential) || 1012 (num_objects >= ppotential)) { 1013 1014 /* 1015 * This should be rare. In any case, 1016 * we will only miss recent objects, 1017 * because they are added at the end. 1018 */ 1019 break; 1020 } 1021 1022 /* 1023 * Avoid interfering with normal operations 1024 */ 1025 if (!VS_MAP_TRY_LOCK(entry)) 1026 goto not_this_one; 1027 size = ps_vstruct_allocated_size(entry); 1028 VS_MAP_UNLOCK(entry); 1029 1030 VS_LOCK(entry); 1031 1032 /* 1033 * We need a reference for our caller. Adding this 1034 * reference through the linked list could race with 1035 * destruction of the object. If we find the object 1036 * has no references, just give up on it. 1037 */ 1038 VS_LOCK(entry); 1039 if (entry->vs_references == 0) { 1040 VS_UNLOCK(entry); 1041 goto not_this_one; 1042 } 1043 pager = vs_to_mem_obj(entry); 1044 dp_memory_object_reference(pager); 1045 VS_UNLOCK(entry); 1046 1047 /* the arrays are wired, so no deadlock worries */ 1048 1049 objects[num_objects].dpo_object = (vm_offset_t) entry; 1050 objects[num_objects].dpo_size = size; 1051 pagers [num_objects++] = pager; 1052 continue; 1053 1054 not_this_one: 1055 /* 1056 * Do not return garbage 1057 */ 1058 objects[num_objects].dpo_object = (vm_offset_t) 0; 1059 objects[num_objects].dpo_size = 0; 1060 pagers[num_objects++] = MEMORY_OBJECT_NULL; 1061 1062 } 1063 1064 VSL_UNLOCK(); 1065 1066 /* clear out any excess allocation */ 1067 while (num_objects < opotential) { 1068 objects[--opotential].dpo_object = (vm_offset_t) 0; 1069 objects[opotential].dpo_size = 0; 1070 } 1071 while (num_objects < ppotential) { 1072 pagers[--ppotential] = MEMORY_OBJECT_NULL; 1073 } 1074 1075 kr = vm_map_unwire(ipc_kernel_map, 1076 vm_map_trunc_page(oaddr, 1077 vm_map_page_mask(ipc_kernel_map)), 1078 vm_map_round_page(oaddr + osize, 1079 vm_map_page_mask(ipc_kernel_map)), 1080 FALSE); 1081 assert(KERN_SUCCESS == kr); 1082 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)oaddr, 1083 (vm_map_size_t)osize, TRUE, &pcopy); 1084 assert(KERN_SUCCESS == kr); 1085 1086 *objectsp = (default_pager_object_array_t)objects; 1087 *ocountp = num_objects; 1088 *portsp = (mach_port_array_t)pcopy; 1089 *pcountp = num_objects; 1090 1091 return KERN_SUCCESS; 1092} 1093 1094kern_return_t 1095default_pager_object_pages( 1096 default_pager_t default_pager, 1097 mach_port_t memory_object, 1098 default_pager_page_array_t *pagesp, 1099 mach_msg_type_number_t *countp) 1100{ 1101 vm_offset_t addr = 0; /* memory for page offsets */ 1102 vm_size_t size = 0; /* current memory size */ 1103 vm_map_copy_t copy; 1104 default_pager_page_t * pages = 0; 1105 unsigned int potential; 1106 unsigned int actual; 1107 kern_return_t kr; 1108 memory_object_t object; 1109 1110 if (default_pager != default_pager_object) 1111 return KERN_INVALID_ARGUMENT; 1112 1113 object = (memory_object_t) memory_object; 1114 1115 potential = 0; 1116 for (;;) { 1117 vstruct_t entry; 1118 1119 VSL_LOCK(); 1120 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, 1121 vs_links) { 1122 VS_LOCK(entry); 1123 if (vs_to_mem_obj(entry) == object) { 1124 VSL_UNLOCK(); 1125 goto found_object; 1126 } 1127 VS_UNLOCK(entry); 1128 } 1129 VSL_UNLOCK(); 1130 1131 /* did not find the object */ 1132 if (0 != addr) 1133 kmem_free(ipc_kernel_map, addr, size); 1134 1135 return KERN_INVALID_ARGUMENT; 1136 1137 found_object: 1138 1139 if (!VS_MAP_TRY_LOCK(entry)) { 1140 /* oh well bad luck */ 1141 int wresult; 1142 1143 VS_UNLOCK(entry); 1144 1145 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_UNINT, 1, 1000*NSEC_PER_USEC); 1146 wresult = thread_block(THREAD_CONTINUE_NULL); 1147 assert(wresult == THREAD_TIMED_OUT); 1148 continue; 1149 } 1150 1151 actual = ps_vstruct_allocated_pages(entry, pages, potential); 1152 VS_MAP_UNLOCK(entry); 1153 VS_UNLOCK(entry); 1154 1155 if (actual <= potential) 1156 break; 1157 1158 /* allocate more memory */ 1159 if (0 != addr) 1160 kmem_free(ipc_kernel_map, addr, size); 1161 1162 size = vm_map_round_page(actual * sizeof (*pages), 1163 vm_map_page_mask(ipc_kernel_map)); 1164 kr = kmem_alloc(ipc_kernel_map, &addr, size); 1165 if (KERN_SUCCESS != kr) 1166 return KERN_RESOURCE_SHORTAGE; 1167 1168 pages = (default_pager_page_t *)addr; 1169 potential = (unsigned int) (size / sizeof (*pages)); 1170 } 1171 1172 /* 1173 * Clear unused memory. 1174 */ 1175 while (actual < potential) 1176 pages[--potential].dpp_offset = 0; 1177 1178 kr = vm_map_unwire(ipc_kernel_map, 1179 vm_map_trunc_page(addr, 1180 vm_map_page_mask(ipc_kernel_map)), 1181 vm_map_round_page(addr + size, 1182 vm_map_page_mask(ipc_kernel_map)), 1183 FALSE); 1184 assert(KERN_SUCCESS == kr); 1185 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr, 1186 (vm_map_size_t)size, TRUE, ©); 1187 assert(KERN_SUCCESS == kr); 1188 1189 1190 *pagesp = (default_pager_page_array_t)copy; 1191 *countp = actual; 1192 return KERN_SUCCESS; 1193} 1194