1/* 2 * ntfs_page.c - NTFS kernel page operations. 3 * 4 * Copyright (c) 2006-2011 Anton Altaparmakov. All Rights Reserved. 5 * Portions Copyright (c) 2006-2011 Apple Inc. All Rights Reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 3. Neither the name of Apple Inc. ("Apple") nor the names of its 16 * contributors may be used to endorse or promote products derived from this 17 * software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 26 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ALTERNATIVELY, provided that this notice and licensing terms are retained in 31 * full, this file may be redistributed and/or modified under the terms of the 32 * GNU General Public License (GPL) Version 2, in which case the provisions of 33 * that version of the GPL will apply to you instead of the license terms 34 * above. You can obtain a copy of the GPL Version 2 at 35 * http://developer.apple.com/opensource/licenses/gpl-2.txt. 36 */ 37 38#include <sys/errno.h> 39#include <sys/stat.h> 40#include <sys/time.h> 41#include <sys/ucred.h> 42#include <sys/ubc.h> 43#include <sys/vnode.h> 44 45#include <kern/debug.h> 46#include <kern/locks.h> 47 48#include "ntfs_attr.h" 49#include "ntfs_compress.h" 50#include "ntfs_debug.h" 51#include "ntfs_inode.h" 52#include "ntfs_layout.h" 53#include "ntfs_page.h" 54#include "ntfs_types.h" 55#include "ntfs_volume.h" 56 57/** 58 * ntfs_pagein - read a range of pages into memory 59 * @ni: ntfs inode whose data to read into the page range 60 * @attr_ofs: byte offset in the inode at which to start 61 * @size: number of bytes to read from the inode 62 * @upl: page list describing destination page range 63 * @upl_ofs: byte offset into page list at which to start 64 * @flags: flags further describing the pagein request 65 * 66 * Read @size bytes from the ntfs inode @ni, starting at byte offset @attr_ofs 67 * into the inode, into the range of pages specified by the page list @upl, 68 * starting at byte offset @upl_ofs into the page list. 69 * 70 * The @flags further describe the pagein request. The following pagein flags 71 * are currently defined in OSX kernel: 72 * UPL_IOSYNC - Perform synchronous i/o. 73 * UPL_NOCOMMIT - Do not commit/abort the page range. 74 * UPL_NORDAHEAD - Do not perform any speculative read-ahead. 75 * IO_PASSIVE - This is background i/o so do not throttle other i/o. 76 * 77 * Inside the ntfs driver we have the need to perform pageins whilst the inode 78 * is locked for writing (@ni->lock) thus we cheat and set UPL_NESTED_PAGEOUT 79 * in @flags when this is the case. We make sure to clear it in @flags before 80 * calling into the cluster layer so we do not accidentally cause confusion. 81 * 82 * For encrypted attributes we abort for now as we do not support them yet. 83 * 84 * For non-resident, non-compressed attributes we use cluster_pagein_ext() 85 * which deals with both normal and multi sector transfer protected attributes. 86 * 87 * For resident attributes and non-resident, compressed attributes we read the 88 * data ourselves by mapping the page list, and in the resident case, mapping 89 * the mft record, looking up the attribute in it, and copying the requested 90 * data from the mapped attribute into the page list, then unmapping the mft 91 * record, whilst for non-resident, compressed attributes, we get the raw inode 92 * and use it with ntfs_read_compressed() to read and decompress the data into 93 * our mapped page list. We then unmap the page list and finally, if 94 * UPL_NOCOMMIT is not specified, we commit (success) or abort (error) the page 95 * range. 96 * 97 * Return 0 on success and errno on error. 98 * 99 * Note the pages in the page list are marked busy on entry and the busy bit is 100 * cleared when we commit the page range. Thus it is perfectly safe for us to 101 * fill the pages with encrypted or mst protected data and to decrypt or mst 102 * deprotect in place before committing the page range. 103 * 104 * Adapted from cluster_pagein_ext(). 105 * 106 * Locking: - Caller must hold an iocount reference on the vnode of @ni. 107 * - Caller must not hold @ni->lock or if it is held it must be for 108 * reading unless UPL_NESTED_PAGEOUT is set in @flags in which case 109 * the caller must hold @ni->lock for reading or writing. 110 */ 111int ntfs_pagein(ntfs_inode *ni, s64 attr_ofs, unsigned size, upl_t upl, 112 upl_offset_t upl_ofs, int flags) 113{ 114 s64 attr_size; 115 u8 *kaddr; 116 kern_return_t kerr; 117 unsigned to_read; 118 int err; 119 BOOL locked = FALSE; 120 121 ntfs_debug("Entering for mft_no 0x%llx, offset 0x%llx, size 0x%x, " 122 "pagein flags 0x%x, page list offset 0x%llx.", 123 (unsigned long long)ni->mft_no, 124 (unsigned long long)attr_ofs, size, flags, 125 (unsigned long long)upl_ofs); 126 /* 127 * If the caller did not specify any i/o, then we are done. We cannot 128 * issue an abort because we do not have a upl or we do not know its 129 * size. 130 */ 131 if (!upl) { 132 ntfs_error(ni->vol->mp, "NULL page list passed in (error " 133 "EINVAL)."); 134 return EINVAL; 135 } 136 if (S_ISDIR(ni->mode)) { 137 ntfs_error(ni->vol->mp, "Called for directory vnode."); 138 err = EISDIR; 139 goto err; 140 } 141 /* 142 * Protect against changes in initialized_size and thus against 143 * truncation also unless UPL_NESTED_PAGEOUT is set in which case the 144 * caller has already taken @ni->lock for exclusive access. We simply 145 * leave @locked to be FALSE in this case so we do not try to drop the 146 * lock later on. 147 * 148 * If UPL_NESTED_PAGEOUT is set we clear it in @flags to ensure we do 149 * not cause confusion in the cluster layer or the VM. 150 */ 151 if (flags & UPL_NESTED_PAGEOUT) 152 flags &= ~UPL_NESTED_PAGEOUT; 153 else { 154 locked = TRUE; 155 lck_rw_lock_shared(&ni->lock); 156 } 157 /* Do not allow messing with the inode once it has been deleted. */ 158 if (NInoDeleted(ni)) { 159 /* Remove the inode from the name cache. */ 160 cache_purge(ni->vn); 161 err = ENOENT; 162 goto err; 163 } 164retry_pagein: 165 /* 166 * We guarantee that the size in the ubc will be smaller or equal to 167 * the size in the ntfs inode thus no need to check @ni->data_size. 168 */ 169 attr_size = ubc_getsize(ni->vn); 170 /* 171 * Only $DATA attributes can be encrypted/compressed. Index root can 172 * have the flags set but this means to create compressed/encrypted 173 * files, not that the attribute is compressed/encrypted. Note we need 174 * to check for AT_INDEX_ALLOCATION since this is the type of directory 175 * index inodes. 176 */ 177 if (ni->type != AT_INDEX_ALLOCATION) { 178 /* TODO: Deny access to encrypted attributes, just like NT4. */ 179 if (NInoEncrypted(ni)) { 180 if (ni->type != AT_DATA) 181 panic("%s(): Encrypted non-data attribute.\n", 182 __FUNCTION__); 183 ntfs_warning(ni->vol->mp, "Denying access to " 184 "encrypted attribute (EACCES)."); 185 err = EACCES; 186 goto err; 187 } 188 /* Compressed data streams need special handling. */ 189 if (NInoNonResident(ni) && NInoCompressed(ni) && !NInoRaw(ni)) { 190 if (ni->type != AT_DATA) 191 panic("%s(): Compressed non-data attribute.\n", 192 __FUNCTION__); 193 goto compressed; 194 } 195 } 196 /* NInoNonResident() == NInoIndexAllocPresent() */ 197 if (NInoNonResident(ni)) { 198 int (*callback)(buf_t, void *); 199 200 callback = NULL; 201 if (NInoMstProtected(ni) || NInoEncrypted(ni)) 202 callback = ntfs_cluster_iodone; 203 /* Non-resident, possibly mst protected, attribute. */ 204 err = cluster_pagein_ext(ni->vn, upl, upl_ofs, attr_ofs, size, 205 attr_size, flags, callback, NULL); 206 if (!err) 207 ntfs_debug("Done (cluster_pagein_ext())."); 208 else 209 ntfs_error(ni->vol->mp, "Failed (cluster_pagein_ext(), " 210 "error %d).", err); 211 if (locked) 212 lck_rw_unlock_shared(&ni->lock); 213 return err; 214 } 215compressed: 216 /* 217 * The attribute is resident and/or compressed. 218 * 219 * Cannot pagein from a negative offset or if we are starting beyond 220 * the end of the attribute or if the attribute offset is not page 221 * aligned or the size requested is not a multiple of PAGE_SIZE. 222 */ 223 if (attr_ofs < 0 || attr_ofs >= attr_size || attr_ofs & PAGE_MASK_64 || 224 size & PAGE_MASK || upl_ofs & PAGE_MASK) { 225 err = EINVAL; 226 goto err; 227 } 228 to_read = size; 229 attr_size -= attr_ofs; 230 if (to_read > attr_size) 231 to_read = attr_size; 232 /* 233 * We do not need @attr_size any more so reuse it to hold the number of 234 * bytes available in the attribute starting at offset @attr_ofs up to 235 * a maximum of the requested number of bytes rounded up to a multiple 236 * of the system page size. 237 */ 238 attr_size = (to_read + PAGE_MASK) & ~PAGE_MASK; 239 /* Abort any pages outside the end of the attribute. */ 240 if (size > attr_size && !(flags & UPL_NOCOMMIT)) { 241 ubc_upl_abort_range(upl, upl_ofs + attr_size, size - attr_size, 242 UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR); 243 /* Update @size. */ 244 size = attr_size; 245 } 246 /* To access the page list contents, we need to map the page list. */ 247 kerr = ubc_upl_map(upl, (vm_offset_t*)&kaddr); 248 if (kerr != KERN_SUCCESS) { 249 ntfs_error(ni->vol->mp, "ubc_upl_map() failed (error %d).", 250 (int)kerr); 251 err = EIO; 252 goto err; 253 } 254 if (!NInoNonResident(ni)) { 255 /* 256 * Read the data from the resident attribute into the page 257 * list. 258 */ 259 err = ntfs_resident_attr_read(ni, attr_ofs, size, 260 kaddr + upl_ofs); 261 if (err && err != EAGAIN) 262 ntfs_error(ni->vol->mp, "ntfs_resident_attr_read() " 263 "failed (error %d).", err); 264 } else { 265 ntfs_inode *raw_ni; 266 int ioflags; 267 268 /* 269 * Get the raw inode. We take the inode lock shared to protect 270 * against concurrent writers as the compressed data is invalid 271 * whilst a write is in progress. 272 */ 273 err = ntfs_raw_inode_get(ni, LCK_RW_TYPE_SHARED, &raw_ni); 274 if (err) 275 ntfs_error(ni->vol->mp, "Failed to get raw inode " 276 "(error %d).", err); 277 else { 278 if (!NInoRaw(raw_ni)) 279 panic("%s(): Requested raw inode but got " 280 "non-raw one.\n", __FUNCTION__); 281 ioflags = 0; 282 if (vnode_isnocache(ni->vn) || 283 vnode_isnocache(raw_ni->vn)) 284 ioflags |= IO_NOCACHE; 285 if (vnode_isnoreadahead(ni->vn) || 286 vnode_isnoreadahead(raw_ni->vn)) 287 ioflags |= IO_RAOFF; 288 err = ntfs_read_compressed(ni, raw_ni, attr_ofs, size, 289 kaddr + upl_ofs, NULL, ioflags); 290 if (err) 291 ntfs_error(ni->vol->mp, 292 "ntfs_read_compressed() " 293 "failed (error %d).", err); 294 lck_rw_unlock_shared(&raw_ni->lock); 295 (void)vnode_put(raw_ni->vn); 296 } 297 } 298 kerr = ubc_upl_unmap(upl); 299 if (kerr != KERN_SUCCESS) { 300 ntfs_error(ni->vol->mp, "ubc_upl_unmap() failed (error %d).", 301 (int)kerr); 302 if (!err) 303 err = EIO; 304 } 305 if (!err) { 306 if (!(flags & UPL_NOCOMMIT)) { 307 /* Commit the page range we brought up to date. */ 308 ubc_upl_commit_range(upl, upl_ofs, size, 309 UPL_COMMIT_FREE_ON_EMPTY); 310 } 311 ntfs_debug("Done (%s).", !NInoNonResident(ni) ? 312 "ntfs_resident_attr_read()" : 313 "ntfs_read_compressed()"); 314 } else /* if (err) */ { 315 /* 316 * If the attribute was converted to non-resident under our 317 * nose, retry the pagein. 318 * 319 * TODO: This may no longer be possible to happen now that we 320 * lock against changes in initialized size and thus 321 * truncation... Revisit this issue when the write code has 322 * been written and remove the check + goto if appropriate. 323 */ 324 if (err == EAGAIN) 325 goto retry_pagein; 326err: 327 if (!(flags & UPL_NOCOMMIT)) { 328 int upl_flags = UPL_ABORT_FREE_ON_EMPTY; 329 if (err != ENOMEM) 330 upl_flags |= UPL_ABORT_ERROR; 331 ubc_upl_abort_range(upl, upl_ofs, size, upl_flags); 332 } 333 ntfs_error(ni->vol->mp, "Failed (error %d).", err); 334 } 335 if (locked) 336 lck_rw_unlock_shared(&ni->lock); 337 return err; 338} 339 340/** 341 * ntfs_page_map_ext - map a page of a vnode into memory 342 * @ni: ntfs inode of which to map a page 343 * @ofs: byte offset into @ni of which to map a page 344 * @upl: destination page list for the page 345 * @pl: destination array of pages containing the page itself 346 * @kaddr: destination pointer for the address of the mapped page contents 347 * @uptodate: if true return an uptodate page and if false return it as is 348 * @rw: if true we intend to modify the page and if false we do not 349 * 350 * Map the page corresponding to byte offset @ofs into the ntfs inode @ni into 351 * memory and return the page list in @upl, the array of pages containing the 352 * page in @pl and the address of the mapped page contents in @kaddr. 353 * 354 * If @uptodate is true the page is returned uptodate, i.e. if the page is 355 * currently not valid, it will be brought uptodate via a call to ntfs_pagein() 356 * before it is returned. And if @uptodate is false, the page is just returned 357 * ignoring its state. This means the page may or may not be uptodate. 358 * 359 * The caller must set @rw to true if the page is going to be modified and to 360 * false otherwise. 361 * 362 * Note: @ofs must be page aligned. 363 * 364 * Locking: - Caller must hold an iocount reference on the vnode of @ni. 365 * - Caller must hold @ni->lock for reading or writing. 366 * 367 * Return 0 on success and errno on error in which case *@upl is set to NULL. 368 */ 369errno_t ntfs_page_map_ext(ntfs_inode *ni, s64 ofs, upl_t *upl, 370 upl_page_info_array_t *pl, u8 **kaddr, const BOOL uptodate, 371 const BOOL rw) 372{ 373 s64 size; 374 kern_return_t kerr; 375 int abort_flags; 376 errno_t err; 377 378 ntfs_debug("Entering for inode 0x%llx, offset 0x%llx, rw is %s.", 379 (unsigned long long)ni->mft_no, 380 (unsigned long long)ofs, 381 rw ? "true" : "false"); 382 if (ofs & PAGE_MASK) 383 panic("%s() called with non page aligned offset (0x%llx).", 384 __FUNCTION__, (unsigned long long)ofs); 385 lck_spin_lock(&ni->size_lock); 386 size = ubc_getsize(ni->vn); 387 if (size > ni->data_size) 388 size = ni->data_size; 389 lck_spin_unlock(&ni->size_lock); 390 if (ofs > size) { 391 ntfs_error(ni->vol->mp, "Offset 0x%llx is outside the end of " 392 "the attribute (0x%llx).", 393 (unsigned long long)ofs, 394 (unsigned long long)size); 395 err = EINVAL; 396 goto err; 397 } 398 /* Create a page list for the wanted page. */ 399 kerr = ubc_create_upl(ni->vn, ofs, PAGE_SIZE, upl, pl, UPL_SET_LITE | 400 (rw ? UPL_WILL_MODIFY : 0)); 401 if (kerr != KERN_SUCCESS) 402 panic("%s(): Failed to get page (error %d).\n", __FUNCTION__, 403 (int)kerr); 404 /* 405 * If the page is not valid, need to read it in from the vnode now thus 406 * making it valid. 407 * 408 * We set UPL_NESTED_PAGEOUT to let ntfs_pagein() know that we already 409 * have the inode locked (@ni->lock is held by the caller). 410 */ 411 if (uptodate && !upl_valid_page(*pl, 0)) { 412 ntfs_debug("Reading page as it was not valid."); 413 err = ntfs_pagein(ni, ofs, PAGE_SIZE, *upl, 0, UPL_IOSYNC | 414 UPL_NOCOMMIT | UPL_NESTED_PAGEOUT); 415 if (err) { 416 ntfs_error(ni->vol->mp, "Failed to read page (error " 417 "%d).", err); 418 goto pagein_err; 419 } 420 } 421 /* Map the page into the kernel's address space. */ 422 kerr = ubc_upl_map(*upl, (vm_offset_t*)kaddr); 423 if (kerr == KERN_SUCCESS) { 424 ntfs_debug("Done."); 425 return 0; 426 } 427 ntfs_error(ni->vol->mp, "Failed to map page (error %d).", 428 (int)kerr); 429 err = EIO; 430pagein_err: 431 abort_flags = UPL_ABORT_FREE_ON_EMPTY; 432 if (!upl_valid_page(*pl, 0) || 433 (vnode_isnocache(ni->vn) && !upl_dirty_page(*pl, 0))) 434 abort_flags |= UPL_ABORT_DUMP_PAGES; 435 ubc_upl_abort_range(*upl, 0, PAGE_SIZE, abort_flags); 436err: 437 *upl = NULL; 438 return err; 439} 440 441/** 442 * ntfs_page_unmap - unmap a page belonging to a vnode from memory 443 * @ni: ntfs inode to which the page belongs 444 * @upl: page list of the page 445 * @pl: array of pages containing the page itself 446 * @mark_dirty: mark the page dirty 447 * 448 * Unmap the page belonging to the ntfs inode @ni from memory releasing it back 449 * to the vm. 450 * 451 * The page is described by the page list @upl, the array of pages containing 452 * the page @pl and the address of the mapped page contents @kaddr. 453 * 454 * If @mark_dirty is TRUE, tell the vm to mark the page dirty when releasing 455 * the page. 456 * 457 * Locking: Caller must hold an iocount reference on the vnode of @ni. 458 */ 459void ntfs_page_unmap(ntfs_inode *ni, upl_t upl, upl_page_info_array_t pl, 460 const BOOL mark_dirty) 461{ 462 kern_return_t kerr; 463 BOOL was_valid, was_dirty; 464 465 was_valid = upl_valid_page(pl, 0); 466 /* The page dirty bit is only valid if the page was valid. */ 467 was_dirty = (was_valid && upl_dirty_page(pl, 0)); 468 ntfs_debug("Entering for inode 0x%llx, page was %svalid %s %sdirty%s.", 469 (unsigned long long)ni->mft_no, 470 was_valid ? "" : "not ", 471 (int)was_valid ^ (int)was_dirty ? "but" : "and", 472 was_dirty ? "" : "not ", 473 mark_dirty ? ", marking it dirty" : ""); 474 /* Unmap the page from the kernel's address space. */ 475 kerr = ubc_upl_unmap(upl); 476 if (kerr != KERN_SUCCESS) 477 ntfs_warning(ni->vol->mp, "ubc_upl_unmap() failed (error %d).", 478 (int)kerr); 479 /* 480 * If the page was valid and dirty or is being made dirty or if caching 481 * for the vnode is enabled (as it will usually be the case for all 482 * metadata files), commit it thus releasing it into the vm taking care 483 * to preserve the dirty state and marking the page dirty if requested 484 * when committing the page. 485 * 486 * If the page was not valid or was valid but not dirty, it is not 487 * being marked dirty, and caching is disabled on the vnode, dump the 488 * page. 489 */ 490 if (was_dirty || mark_dirty || !vnode_isnocache(ni->vn)) { 491 int commit_flags; 492 493 commit_flags = UPL_COMMIT_FREE_ON_EMPTY | 494 UPL_COMMIT_INACTIVATE; 495 if (!was_valid && !mark_dirty) 496 commit_flags |= UPL_COMMIT_CLEAR_DIRTY; 497 else if (was_dirty || mark_dirty) 498 commit_flags |= UPL_COMMIT_SET_DIRTY; 499 ubc_upl_commit_range(upl, 0, PAGE_SIZE, commit_flags); 500 ntfs_debug("Done (committed page)."); 501 } else { 502 ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | 503 UPL_ABORT_FREE_ON_EMPTY); 504 ntfs_debug("Done (dumped page)."); 505 } 506} 507 508/** 509 * ntfs_page_dump - discard a page belonging to a vnode from memory 510 * @ni: ntfs inode to which the page belongs 511 * @upl: page list of the page 512 * @pl: array of pages containing the page itself 513 * 514 * Unmap the page belonging to the ntfs inode @ni from memory throwing it away. 515 * Note that if the page is dirty all changes to the page will be lost as it 516 * will be discarded so use this function with extreme caution. 517 * 518 * The page is described by the page list @upl, the array of pages containing 519 * the page @pl and the address of the mapped page contents @kaddr. 520 * 521 * Locking: Caller must hold an iocount reference on the vnode of @ni. 522 */ 523void ntfs_page_dump(ntfs_inode *ni, upl_t upl, 524 upl_page_info_array_t pl __unused) 525{ 526 kern_return_t kerr; 527 528 ntfs_debug("Entering for inode 0x%llx, page is %svalid, %sdirty.", 529 (unsigned long long)ni->mft_no, 530 upl_valid_page(pl, 0) ? "" : "not ", 531 upl_dirty_page(pl, 0) ? "" : "not "); 532 /* Unmap the page from the kernel's address space. */ 533 kerr = ubc_upl_unmap(upl); 534 if (kerr != KERN_SUCCESS) 535 ntfs_warning(ni->vol->mp, "ubc_upl_unmap() failed (error %d).", 536 (int)kerr); 537 /* Dump the page. */ 538 ubc_upl_abort_range(upl, 0, PAGE_SIZE, UPL_ABORT_DUMP_PAGES | 539 UPL_ABORT_FREE_ON_EMPTY); 540 ntfs_debug("Done."); 541} 542