1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <stdint.h> 30#include <sys/fcntl.h> 31#include <sys/vnode_internal.h> 32#include <sys/vnode.h> 33#include <sys/kauth.h> 34#include <sys/mount_internal.h> 35#include <sys/buf_internal.h> 36#include <kern/debug.h> 37#include <kern/kalloc.h> 38#include <sys/cprotect.h> 39#include <sys/disk.h> 40#include <vm/vm_protos.h> 41#include <vm/vm_pageout.h> 42 43void vm_swapfile_open(const char *path, vnode_t *vp); 44void vm_swapfile_close(uint64_t path, vnode_t vp); 45int vm_swapfile_preallocate(vnode_t vp, uint64_t *size); 46uint64_t vm_swapfile_get_blksize(vnode_t vp); 47uint64_t vm_swapfile_get_transfer_size(vnode_t vp); 48int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags); 49 50void 51vm_swapfile_open(const char *path, vnode_t *vp) 52{ 53 int error = 0; 54 vfs_context_t ctx = vfs_context_current(); 55 56 if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) { 57 printf("Failed to open swap file %d\n", error); 58 *vp = NULL; 59 return; 60 } 61 62 vnode_put(*vp); 63} 64 65uint64_t 66vm_swapfile_get_blksize(vnode_t vp) 67{ 68 return ((uint64_t)vfs_devblocksize(vnode_mount(vp))); 69} 70 71uint64_t 72vm_swapfile_get_transfer_size(vnode_t vp) 73{ 74 return((uint64_t)vp->v_mount->mnt_vfsstat.f_iosize); 75} 76 77int unlink1(vfs_context_t, struct nameidata *, int); 78 79void 80vm_swapfile_close(uint64_t path_addr, vnode_t vp) 81{ 82 struct nameidata nd; 83 vfs_context_t context = vfs_context_current(); 84 int error = 0; 85 86 vnode_getwithref(vp); 87 vnode_close(vp, 0, context); 88 89 NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_SYSSPACE, 90 path_addr, context); 91 92 error = unlink1(context, &nd, 0); 93} 94 95int 96vm_swapfile_preallocate(vnode_t vp, uint64_t *size) 97{ 98 int error = 0; 99 uint64_t file_size = 0; 100 vfs_context_t ctx = NULL; 101 102 103 ctx = vfs_context_current(); 104 105#if CONFIG_PROTECT 106 { 107#if 0 // <rdar://11771612> 108 109 if ((error = cp_vnode_setclass(vp, PROTECTION_CLASS_F))) { 110 if(config_protect_bug) { 111 printf("swap protection class set failed with %d\n", error); 112 } else { 113 panic("swap protection class set failed with %d\n", error); 114 } 115 } 116#endif 117 /* initialize content protection keys manually */ 118 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { 119 printf("Content Protection key failure on swap: %d\n", error); 120 vnode_put(vp); 121 vp = NULL; 122 goto done; 123 } 124 } 125#endif 126 127 error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx); 128 129 if (error) { 130 printf("vnode_setsize for swap files failed: %d\n", error); 131 goto done; 132 } 133 134 error = vnode_size(vp, (off_t*) &file_size, ctx); 135 136 if (error) { 137 printf("vnode_size (new file) for swap file failed: %d\n", error); 138 } 139 140 assert(file_size == *size); 141 142 vnode_lock_spin(vp); 143 SET(vp->v_flag, VSWAP); 144 vnode_unlock(vp); 145done: 146 return error; 147} 148 149int 150vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags) 151{ 152 int error = 0; 153 uint64_t io_size = npages * PAGE_SIZE_64; 154#if 1 155 kern_return_t kr = KERN_SUCCESS; 156 upl_t upl = NULL; 157 unsigned int count = 0; 158 int upl_create_flags = 0, upl_control_flags = 0; 159 upl_size_t upl_size = 0; 160 161 upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE; 162 163#if ENCRYPTED_SWAP 164 upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED; 165#else 166 upl_control_flags = UPL_IOSYNC; 167#endif 168 if ((flags & SWAP_READ) == FALSE) { 169 upl_create_flags |= UPL_COPYOUT_FROM; 170 } 171 172 upl_size = io_size; 173 kr = vm_map_create_upl( kernel_map, 174 start, 175 &upl_size, 176 &upl, 177 NULL, 178 &count, 179 &upl_create_flags); 180 181 if (kr != KERN_SUCCESS || (upl_size != io_size)) { 182 panic("vm_map_create_upl failed with %d\n", kr); 183 } 184 185 if (flags & SWAP_READ) { 186 vnode_pagein(vp, 187 upl, 188 0, 189 offset, 190 io_size, 191 upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK, 192 &error); 193 if (error) { 194#if DEBUG 195 printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size); 196#else /* DEBUG */ 197 printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error); 198#endif /* DEBUG */ 199 } 200 201 } else { 202 vnode_pageout(vp, 203 upl, 204 0, 205 offset, 206 io_size, 207 upl_control_flags, 208 &error); 209 if (error) { 210#if DEBUG 211 printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size); 212#else /* DEBUG */ 213 printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error); 214#endif /* DEBUG */ 215 } 216 } 217 return error; 218 219#else /* 1 */ 220 vfs_context_t ctx; 221 ctx = vfs_context_kernel(); 222 223 error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset, 224 UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx)); 225 226 if (error) { 227 printf("vn_rdwr: Swap I/O failed with %d\n", error); 228 } 229 return error; 230#endif /* 1 */ 231} 232 233 234#define MAX_BATCH_TO_TRIM 256 235 236#define ROUTE_ONLY 0x10 /* if corestorage is present, tell it to just pass */ 237 /* the DKIOUNMAP command through w/o acting on it */ 238 /* this is used by the compressed swap system to reclaim empty space */ 239 240 241u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl, boolean_t route_only) 242{ 243 int error = 0; 244 int trim_index = 0; 245 u_int32_t blocksize = 0; 246 struct vnode *devvp; 247 dk_extent_t *extents; 248 dk_unmap_t unmap; 249 _dk_cs_unmap_t cs_unmap; 250 251 if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED)) 252 return (ENOTSUP); 253 254 if (tl == NULL) 255 return (0); 256 257 /* 258 * Get the underlying device vnode and physical block size 259 */ 260 devvp = vp->v_mount->mnt_devvp; 261 blocksize = vp->v_mount->mnt_devblocksize; 262 263 extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); 264 265 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { 266 memset (&cs_unmap, 0, sizeof(_dk_cs_unmap_t)); 267 cs_unmap.extents = extents; 268 269 if (route_only == TRUE) 270 cs_unmap.options = ROUTE_ONLY; 271 } else { 272 memset (&unmap, 0, sizeof(dk_unmap_t)); 273 unmap.extents = extents; 274 } 275 276 while (tl) { 277 daddr64_t io_blockno; /* Block number corresponding to the start of the extent */ 278 size_t io_bytecount; /* Number of bytes in current extent for the specified range */ 279 size_t trimmed; 280 size_t remaining_length; 281 off_t current_offset; 282 283 current_offset = tl->tl_offset; 284 remaining_length = tl->tl_length; 285 trimmed = 0; 286 287 /* 288 * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single 289 * extent from the blockmap call. Keep looping/going until we are sure we've hit 290 * the whole range or if we encounter an error. 291 */ 292 while (trimmed < tl->tl_length) { 293 /* 294 * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the 295 * specified offset. It returns blocks in contiguous chunks, so if the logical range is 296 * broken into multiple extents, it must be called multiple times, increasing the offset 297 * in each call to ensure that the entire range is covered. 298 */ 299 error = VNOP_BLOCKMAP (vp, current_offset, remaining_length, 300 &io_blockno, &io_bytecount, NULL, VNODE_READ, NULL); 301 302 if (error) { 303 goto trim_exit; 304 } 305 306 extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize; 307 extents[trim_index].length = io_bytecount; 308 309 trim_index++; 310 311 if (trim_index == MAX_BATCH_TO_TRIM) { 312 313 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { 314 cs_unmap.extentsCount = trim_index; 315 error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel()); 316 } else { 317 unmap.extentsCount = trim_index; 318 error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); 319 } 320 if (error) { 321 goto trim_exit; 322 } 323 trim_index = 0; 324 } 325 trimmed += io_bytecount; 326 current_offset += io_bytecount; 327 remaining_length -= io_bytecount; 328 } 329 tl = tl->tl_next; 330 } 331 if (trim_index) { 332 if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) { 333 cs_unmap.extentsCount = trim_index; 334 error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel()); 335 } else { 336 unmap.extentsCount = trim_index; 337 error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel()); 338 } 339 } 340trim_exit: 341 kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM); 342 343 return error; 344} 345