1/* 2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Mach Operating System 30 * Copyright (c) 1987 Carnegie-Mellon University 31 * All rights reserved. The CMU software License Agreement specifies 32 * the terms and conditions for use and redistribution. 33 */ 34/* 35 * File: vnode_pager.c 36 * 37 * "Swap" pager that pages to/from vnodes. Also 38 * handles demand paging from files. 39 * 40 */ 41 42#include <mach/boolean.h> 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/user.h> 46#include <sys/proc.h> 47#include <sys/kauth.h> 48#include <sys/buf.h> 49#include <sys/uio.h> 50#include <sys/vnode_internal.h> 51#include <sys/namei.h> 52#include <sys/mount_internal.h> /* needs internal due to fhandle_t */ 53#include <sys/ubc_internal.h> 54#include <sys/lock.h> 55 56#include <mach/mach_types.h> 57#include <mach/memory_object_types.h> 58#include <mach/sdt.h> 59 60#include <vm/vm_map.h> 61#include <vm/vm_kern.h> 62#include <kern/zalloc.h> 63#include <kern/kalloc.h> 64#include <libkern/libkern.h> 65 66#include <vm/vnode_pager.h> 67#include <vm/vm_pageout.h> 68 69#include <kern/assert.h> 70#include <sys/kdebug.h> 71#include <machine/spl.h> 72 73#include <nfs/rpcv2.h> 74#include <nfs/nfsproto.h> 75#include <nfs/nfs.h> 76 77#include <vm/vm_protos.h> 78 79unsigned int vp_pagein=0; 80unsigned int vp_pgodirty=0; 81unsigned int vp_pgoclean=0; 82unsigned int dp_pgouts=0; /* Default pager pageouts */ 83unsigned int dp_pgins=0; /* Default pager pageins */ 84 85vm_object_offset_t 86vnode_pager_get_filesize(struct vnode *vp) 87{ 88 89 return (vm_object_offset_t) ubc_getsize(vp); 90} 91 92kern_return_t 93vnode_pager_get_pathname( 94 struct vnode *vp, 95 char *pathname, 96 vm_size_t *length_p) 97{ 98 int error, len; 99 100 len = (int) *length_p; 101 error = vn_getpath(vp, pathname, &len); 102 if (error != 0) { 103 return KERN_FAILURE; 104 } 105 *length_p = (vm_size_t) len; 106 return KERN_SUCCESS; 107} 108 109kern_return_t 110vnode_pager_get_filename( 111 struct vnode *vp, 112 const char **filename) 113{ 114 *filename = vp->v_name; 115 return KERN_SUCCESS; 116} 117 118kern_return_t 119vnode_pager_get_cs_blobs( 120 struct vnode *vp, 121 void **blobs) 122{ 123 *blobs = ubc_get_cs_blobs(vp); 124 return KERN_SUCCESS; 125} 126 127pager_return_t 128vnode_pageout(struct vnode *vp, 129 upl_t upl, 130 vm_offset_t upl_offset, 131 vm_object_offset_t f_offset, 132 vm_size_t size, 133 int flags, 134 int *errorp) 135{ 136 int result = PAGER_SUCCESS; 137 int error = 0; 138 int error_ret = 0; 139 daddr64_t blkno; 140 int isize; 141 int pg_index; 142 int base_index; 143 int offset; 144 upl_page_info_t *pl; 145 vfs_context_t ctx = vfs_context_current(); /* pager context */ 146 147 isize = (int)size; 148 149 if (isize <= 0) { 150 result = PAGER_ERROR; 151 error_ret = EINVAL; 152 goto out; 153 } 154 155 if (UBCINFOEXISTS(vp) == 0) { 156 result = PAGER_ERROR; 157 error_ret = EINVAL; 158 159 if (upl && !(flags & UPL_NOCOMMIT)) 160 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY); 161 goto out; 162 } 163 if ( !(flags & UPL_VNODE_PAGER)) { 164 /* 165 * This is a pageout from the default pager, 166 * just go ahead and call vnop_pageout since 167 * it has already sorted out the dirty ranges 168 */ 169 dp_pgouts++; 170 171 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START, 172 size, 1, 0, 0, 0); 173 174 if ( (error_ret = VNOP_PAGEOUT(vp, upl, upl_offset, (off_t)f_offset, 175 (size_t)size, flags, ctx)) ) 176 result = PAGER_ERROR; 177 178 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END, 179 size, 1, 0, 0, 0); 180 181 goto out; 182 } 183 /* 184 * we come here for pageouts to 'real' files and 185 * for msyncs... the upl may not contain any 186 * dirty pages.. it's our responsibility to sort 187 * through it and find the 'runs' of dirty pages 188 * to call VNOP_PAGEOUT on... 189 */ 190 pl = ubc_upl_pageinfo(upl); 191 192 if (ubc_getsize(vp) == 0) { 193 /* 194 * if the file has been effectively deleted, then 195 * we need to go through the UPL and invalidate any 196 * buffer headers we might have that reference any 197 * of it's pages 198 */ 199 for (offset = upl_offset; isize; isize -= PAGE_SIZE, offset += PAGE_SIZE) { 200#if NFSCLIENT 201 if (vp->v_tag == VT_NFS) 202 /* check with nfs if page is OK to drop */ 203 error = nfs_buf_page_inval(vp, (off_t)f_offset); 204 else 205#endif 206 { 207 blkno = ubc_offtoblk(vp, (off_t)f_offset); 208 error = buf_invalblkno(vp, blkno, 0); 209 } 210 if (error) { 211 if ( !(flags & UPL_NOCOMMIT)) 212 ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY); 213 if (error_ret == 0) 214 error_ret = error; 215 result = PAGER_ERROR; 216 217 } else if ( !(flags & UPL_NOCOMMIT)) { 218 ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY); 219 } 220 f_offset += PAGE_SIZE; 221 } 222 goto out; 223 } 224 /* 225 * Ignore any non-present pages at the end of the 226 * UPL so that we aren't looking at a upl that 227 * may already have been freed by the preceeding 228 * aborts/completions. 229 */ 230 base_index = upl_offset / PAGE_SIZE; 231 232 for (pg_index = (upl_offset + isize) / PAGE_SIZE; pg_index > base_index;) { 233 if (upl_page_present(pl, --pg_index)) 234 break; 235 if (pg_index == base_index) { 236 /* 237 * no pages were returned, so release 238 * our hold on the upl and leave 239 */ 240 if ( !(flags & UPL_NOCOMMIT)) 241 ubc_upl_abort_range(upl, upl_offset, isize, UPL_ABORT_FREE_ON_EMPTY); 242 243 goto out; 244 } 245 } 246 isize = ((pg_index + 1) - base_index) * PAGE_SIZE; 247 248 offset = upl_offset; 249 pg_index = base_index; 250 251 while (isize) { 252 int xsize; 253 int num_of_pages; 254 255 if ( !upl_page_present(pl, pg_index)) { 256 /* 257 * we asked for RET_ONLY_DIRTY, so it's possible 258 * to get back empty slots in the UPL 259 * just skip over them 260 */ 261 f_offset += PAGE_SIZE; 262 offset += PAGE_SIZE; 263 isize -= PAGE_SIZE; 264 pg_index++; 265 266 continue; 267 } 268 if ( !upl_dirty_page(pl, pg_index)) { 269 /* 270 * if the page is not dirty and reached here it is 271 * marked precious or it is due to invalidation in 272 * memory_object_lock request as part of truncation 273 * We also get here from vm_object_terminate() 274 * So all you need to do in these 275 * cases is to invalidate incore buffer if it is there 276 * Note we must not sleep here if the buffer is busy - that is 277 * a lock inversion which causes deadlock. 278 */ 279 vp_pgoclean++; 280 281#if NFSCLIENT 282 if (vp->v_tag == VT_NFS) 283 /* check with nfs if page is OK to drop */ 284 error = nfs_buf_page_inval(vp, (off_t)f_offset); 285 else 286#endif 287 { 288 blkno = ubc_offtoblk(vp, (off_t)f_offset); 289 error = buf_invalblkno(vp, blkno, 0); 290 } 291 if (error) { 292 if ( !(flags & UPL_NOCOMMIT)) 293 ubc_upl_abort_range(upl, offset, PAGE_SIZE, UPL_ABORT_FREE_ON_EMPTY); 294 if (error_ret == 0) 295 error_ret = error; 296 result = PAGER_ERROR; 297 298 } else if ( !(flags & UPL_NOCOMMIT)) { 299 ubc_upl_commit_range(upl, offset, PAGE_SIZE, UPL_COMMIT_FREE_ON_EMPTY); 300 } 301 f_offset += PAGE_SIZE; 302 offset += PAGE_SIZE; 303 isize -= PAGE_SIZE; 304 pg_index++; 305 306 continue; 307 } 308 vp_pgodirty++; 309 310 num_of_pages = 1; 311 xsize = isize - PAGE_SIZE; 312 313 while (xsize) { 314 if ( !upl_dirty_page(pl, pg_index + num_of_pages)) 315 break; 316 num_of_pages++; 317 xsize -= PAGE_SIZE; 318 } 319 xsize = num_of_pages * PAGE_SIZE; 320 321 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_START, 322 xsize, (int)f_offset, 0, 0, 0); 323 324 if ( (error = VNOP_PAGEOUT(vp, upl, (vm_offset_t)offset, (off_t)f_offset, 325 xsize, flags, ctx)) ) { 326 if (error_ret == 0) 327 error_ret = error; 328 result = PAGER_ERROR; 329 } 330 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, 1)) | DBG_FUNC_END, 331 xsize, 0, 0, 0, 0); 332 333 f_offset += xsize; 334 offset += xsize; 335 isize -= xsize; 336 pg_index += num_of_pages; 337 } 338out: 339 if (errorp) 340 *errorp = error_ret; 341 342 return (result); 343} 344 345 346pager_return_t 347vnode_pagein( 348 struct vnode *vp, 349 upl_t upl, 350 vm_offset_t upl_offset, 351 vm_object_offset_t f_offset, 352 vm_size_t size, 353 int flags, 354 int *errorp) 355{ 356 struct uthread *ut; 357 upl_page_info_t *pl; 358 int result = PAGER_SUCCESS; 359 int error = 0; 360 int pages_in_upl; 361 int start_pg; 362 int last_pg; 363 int first_pg; 364 int xsize; 365 int must_commit = 1; 366 367 if (flags & UPL_NOCOMMIT) 368 must_commit = 0; 369 370 if (UBCINFOEXISTS(vp) == 0) { 371 result = PAGER_ERROR; 372 error = PAGER_ERROR; 373 374 if (upl && must_commit) 375 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR); 376 377 goto out; 378 } 379 if (upl == (upl_t)NULL) { 380 if (size > (MAX_UPL_SIZE * PAGE_SIZE)) { 381 382 panic("vnode_pagein: size = %x\n", size); 383 384 result = PAGER_ERROR; 385 error = PAGER_ERROR; 386 goto out; 387 } 388 ubc_create_upl(vp, f_offset, size, &upl, &pl, UPL_NOBLOCK | UPL_RET_ONLY_ABSENT | UPL_SET_LITE); 389 390 if (upl == (upl_t)NULL) { 391 392 panic("vnode_pagein: ubc_create_upl failed\n"); 393 394 result = PAGER_ABSENT; 395 error = PAGER_ABSENT; 396 goto out; 397 } 398 upl_offset = 0; 399 first_pg = 0; 400 401 /* 402 * if we get here, we've created the upl and 403 * are responsible for commiting/aborting it 404 * regardless of what the caller has passed in 405 */ 406 flags &= ~UPL_NOCOMMIT; 407 must_commit = 1; 408 409 vp_pagein++; 410 } else { 411 pl = ubc_upl_pageinfo(upl); 412 first_pg = upl_offset / PAGE_SIZE; 413 414 dp_pgins++; 415 } 416 pages_in_upl = size / PAGE_SIZE; 417 DTRACE_VM2(pgpgin, int, pages_in_upl, (uint64_t *), NULL); 418 419 /* 420 * before we start marching forward, we must make sure we end on 421 * a present page, otherwise we will be working with a freed 422 * upl 423 */ 424 for (last_pg = pages_in_upl - 1; last_pg >= first_pg; last_pg--) { 425 if (upl_page_present(pl, last_pg)) 426 break; 427 if (last_pg == first_pg) { 428 /* 429 * empty UPL, no pages are present 430 */ 431 if (must_commit) 432 ubc_upl_abort_range(upl, upl_offset, size, UPL_ABORT_FREE_ON_EMPTY); 433 goto out; 434 } 435 } 436 pages_in_upl = last_pg + 1; 437 last_pg = first_pg; 438 439 while (last_pg < pages_in_upl) { 440 /* 441 * skip over missing pages... 442 */ 443 for ( ; last_pg < pages_in_upl; last_pg++) { 444 if (upl_page_present(pl, last_pg)) 445 break; 446 } 447 /* 448 * skip over 'valid' pages... we don't want to issue I/O for these 449 */ 450 for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) { 451 if (!upl_valid_page(pl, last_pg)) 452 break; 453 } 454 if (last_pg > start_pg) { 455 /* 456 * we've found a range of valid pages 457 * if we've got COMMIT responsibility 458 * commit this range of pages back to the 459 * cache unchanged 460 */ 461 xsize = (last_pg - start_pg) * PAGE_SIZE; 462 463 if (must_commit) 464 ubc_upl_abort_range(upl, start_pg * PAGE_SIZE, xsize, UPL_ABORT_FREE_ON_EMPTY); 465 } 466 if (last_pg == pages_in_upl) 467 /* 468 * we're done... all pages that were present 469 * have either had I/O issued on them or 470 * were aborted unchanged... 471 */ 472 break; 473 474 if (!upl_page_present(pl, last_pg)) { 475 /* 476 * we found a range of valid pages 477 * terminated by a missing page... 478 * bump index to the next page and continue on 479 */ 480 last_pg++; 481 continue; 482 } 483 /* 484 * scan from the found invalid page looking for a valid 485 * or non-present page before the end of the upl is reached, if we 486 * find one, then it will be the last page of the request to 487 * 'cluster_io' 488 */ 489 for (start_pg = last_pg; last_pg < pages_in_upl; last_pg++) { 490 if (upl_valid_page(pl, last_pg) || !upl_page_present(pl, last_pg)) 491 break; 492 } 493 if (last_pg > start_pg) { 494 int xoff; 495 xsize = (last_pg - start_pg) * PAGE_SIZE; 496 xoff = start_pg * PAGE_SIZE; 497 498 if ( (error = VNOP_PAGEIN(vp, upl, (vm_offset_t) xoff, 499 (off_t)f_offset + xoff, 500 xsize, flags, vfs_context_current())) ) { 501 result = PAGER_ERROR; 502 error = PAGER_ERROR; 503 504 } 505 } 506 } 507out: 508 if (errorp) 509 *errorp = result; 510 511 ut = get_bsdthread_info(current_thread()); 512 513 if (ut->uu_lowpri_window) { 514 /* 515 * task is marked as a low priority I/O type 516 * and the I/O we issued while in this page fault 517 * collided with normal I/O operations... we'll 518 * delay in order to mitigate the impact of this 519 * task on the normal operation of the system 520 */ 521 throttle_lowpri_io(TRUE); 522 } 523 return (error); 524} 525 526void 527vnode_pager_shutdown(void) 528{ 529 int i; 530 vnode_t vp; 531 532 for(i = 0; i < MAX_BACKING_STORE; i++) { 533 vp = (vnode_t)(bs_port_table[i]).vp; 534 if (vp) { 535 (bs_port_table[i]).vp = 0; 536 537 /* get rid of macx_swapon() reference */ 538 vnode_rele(vp); 539 } 540 } 541} 542 543 544void * 545upl_get_internal_page_list(upl_t upl) 546{ 547 return(UPL_GET_INTERNAL_PAGE_LIST(upl)); 548 549} 550