1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <stdint.h>
30#include <sys/fcntl.h>
31#include <sys/vnode_internal.h>
32#include <sys/vnode.h>
33#include <sys/kauth.h>
34#include <sys/mount_internal.h>
35#include <sys/buf_internal.h>
36#include <kern/debug.h>
37#include <kern/kalloc.h>
38#include <sys/cprotect.h>
39#include <sys/disk.h>
40#include <vm/vm_protos.h>
41#include <vm/vm_pageout.h>
42
43void vm_swapfile_open(const char *path, vnode_t *vp);
44void vm_swapfile_close(uint64_t path, vnode_t vp);
45int vm_swapfile_preallocate(vnode_t vp, uint64_t *size);
46uint64_t vm_swapfile_get_blksize(vnode_t vp);
47uint64_t vm_swapfile_get_transfer_size(vnode_t vp);
48int vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags);
49
50void
51vm_swapfile_open(const char *path, vnode_t *vp)
52{
53	int error = 0;
54	vfs_context_t	ctx = vfs_context_current();
55
56	if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) {
57		printf("Failed to open swap file %d\n", error);
58		*vp = NULL;
59		return;
60	}
61
62	vnode_put(*vp);
63}
64
65uint64_t
66vm_swapfile_get_blksize(vnode_t vp)
67{
68	return ((uint64_t)vfs_devblocksize(vnode_mount(vp)));
69}
70
71uint64_t
72vm_swapfile_get_transfer_size(vnode_t vp)
73{
74	return((uint64_t)vp->v_mount->mnt_vfsstat.f_iosize);
75}
76
77int unlink1(vfs_context_t, struct nameidata *, int);
78
79void
80vm_swapfile_close(uint64_t path_addr, vnode_t vp)
81{
82	struct nameidata nd;
83	vfs_context_t context = vfs_context_current();
84	int error = 0;
85
86	vnode_getwithref(vp);
87	vnode_close(vp, 0, context);
88
89	NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_SYSSPACE,
90	       path_addr, context);
91
92	error = unlink1(context, &nd, 0);
93}
94
95int
96vm_swapfile_preallocate(vnode_t vp, uint64_t *size)
97{
98	int		error = 0;
99	uint64_t	file_size = 0;
100	vfs_context_t	ctx = NULL;
101
102
103	ctx = vfs_context_current();
104
105#if CONFIG_PROTECT
106	{
107#if 0	// <rdar://11771612>
108
109		if ((error = cp_vnode_setclass(vp, PROTECTION_CLASS_F))) {
110			if(config_protect_bug) {
111				printf("swap protection class set failed with %d\n", error);
112			} else {
113				panic("swap protection class set failed with %d\n", error);
114			}
115		}
116#endif
117		/* initialize content protection keys manually */
118		if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
119			printf("Content Protection key failure on swap: %d\n", error);
120			vnode_put(vp);
121			vp = NULL;
122			goto done;
123 		}
124	}
125#endif
126
127	error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx);
128
129	if (error) {
130		printf("vnode_setsize for swap files failed: %d\n", error);
131		goto done;
132	}
133
134	error = vnode_size(vp, (off_t*) &file_size, ctx);
135
136	if (error) {
137		printf("vnode_size (new file) for swap file failed: %d\n", error);
138	}
139
140	assert(file_size == *size);
141
142	vnode_lock_spin(vp);
143	SET(vp->v_flag, VSWAP);
144	vnode_unlock(vp);
145done:
146	return error;
147}
148
149int
150vm_swapfile_io(vnode_t vp, uint64_t offset, uint64_t start, int npages, int flags)
151{
152	int error = 0;
153	uint64_t io_size = npages * PAGE_SIZE_64;
154#if 1
155	kern_return_t	kr = KERN_SUCCESS;
156	upl_t		upl = NULL;
157	unsigned int	count = 0;
158	int		upl_create_flags = 0, upl_control_flags = 0;
159	upl_size_t	upl_size = 0;
160
161	upl_create_flags = UPL_SET_INTERNAL | UPL_SET_LITE;
162
163#if ENCRYPTED_SWAP
164	upl_control_flags = UPL_IOSYNC | UPL_PAGING_ENCRYPTED;
165#else
166	upl_control_flags = UPL_IOSYNC;
167#endif
168	if ((flags & SWAP_READ) == FALSE) {
169		upl_create_flags |= UPL_COPYOUT_FROM;
170	}
171
172	upl_size = io_size;
173	kr = vm_map_create_upl( kernel_map,
174				start,
175				&upl_size,
176				&upl,
177				NULL,
178				&count,
179				&upl_create_flags);
180
181	if (kr != KERN_SUCCESS || (upl_size != io_size)) {
182		panic("vm_map_create_upl failed with %d\n", kr);
183	}
184
185	if (flags & SWAP_READ) {
186		vnode_pagein(vp,
187			      upl,
188			      0,
189			      offset,
190			      io_size,
191			      upl_control_flags | UPL_IGNORE_VALID_PAGE_CHECK,
192			      &error);
193		if (error) {
194#if DEBUG
195			printf("vm_swapfile_io: vnode_pagein failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size);
196#else /* DEBUG */
197			printf("vm_swapfile_io: vnode_pagein failed with %d.\n", error);
198#endif /* DEBUG */
199		}
200
201	} else {
202		vnode_pageout(vp,
203			      upl,
204			      0,
205			      offset,
206			      io_size,
207			      upl_control_flags,
208			      &error);
209		if (error) {
210#if DEBUG
211			printf("vm_swapfile_io: vnode_pageout failed with %d (vp: %p, offset: 0x%llx, size:%llu)\n", error, vp, offset, io_size);
212#else /* DEBUG */
213			printf("vm_swapfile_io: vnode_pageout failed with %d.\n", error);
214#endif /* DEBUG */
215		}
216	}
217	return error;
218
219#else /* 1 */
220	vfs_context_t ctx;
221	ctx = vfs_context_kernel();
222
223	error = vn_rdwr((flags & SWAP_READ) ? UIO_READ : UIO_WRITE, vp, (caddr_t)start, io_size, offset,
224		UIO_SYSSPACE, IO_SYNC | IO_NODELOCKED | IO_UNIT | IO_NOCACHE | IO_SWAP_DISPATCH, vfs_context_ucred(ctx), (int *) 0, vfs_context_proc(ctx));
225
226	if (error) {
227		printf("vn_rdwr: Swap I/O failed with %d\n", error);
228	}
229	return error;
230#endif /* 1 */
231}
232
233
234#define MAX_BATCH_TO_TRIM	256
235
236#define ROUTE_ONLY		0x10		/* if corestorage is present, tell it to just pass */
237                                                /* the DKIOUNMAP command through w/o acting on it */
238                                                /* this is used by the compressed swap system to reclaim empty space */
239
240
241u_int32_t vnode_trim_list (vnode_t vp, struct trim_list *tl, boolean_t route_only)
242{
243	int		error = 0;
244	int		trim_index = 0;
245	u_int32_t	blocksize = 0;
246	struct vnode	*devvp;
247	dk_extent_t	*extents;
248	dk_unmap_t	unmap;
249	_dk_cs_unmap_t	cs_unmap;
250
251	if ( !(vp->v_mount->mnt_ioflags & MNT_IOFLAGS_UNMAP_SUPPORTED))
252		return (ENOTSUP);
253
254	if (tl == NULL)
255		return (0);
256
257	/*
258	 * Get the underlying device vnode and physical block size
259	 */
260	devvp = vp->v_mount->mnt_devvp;
261	blocksize = vp->v_mount->mnt_devblocksize;
262
263	extents = kalloc(sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);
264
265	if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
266		memset (&cs_unmap, 0, sizeof(_dk_cs_unmap_t));
267		cs_unmap.extents = extents;
268
269		if (route_only == TRUE)
270			cs_unmap.options = ROUTE_ONLY;
271	} else {
272		memset (&unmap, 0, sizeof(dk_unmap_t));
273		unmap.extents = extents;
274	}
275
276	while (tl) {
277		daddr64_t	io_blockno;	/* Block number corresponding to the start of the extent */
278		size_t		io_bytecount;	/* Number of bytes in current extent for the specified range */
279		size_t		trimmed;
280		size_t		remaining_length;
281		off_t		current_offset;
282
283		current_offset = tl->tl_offset;
284		remaining_length = tl->tl_length;
285		trimmed = 0;
286
287		/*
288		 * We may not get the entire range from tl_offset -> tl_offset+tl_length in a single
289		 * extent from the blockmap call.  Keep looping/going until we are sure we've hit
290		 * the whole range or if we encounter an error.
291		 */
292		while (trimmed < tl->tl_length) {
293			/*
294			 * VNOP_BLOCKMAP will tell us the logical to physical block number mapping for the
295			 * specified offset.  It returns blocks in contiguous chunks, so if the logical range is
296			 * broken into multiple extents, it must be called multiple times, increasing the offset
297			 * in each call to ensure that the entire range is covered.
298			 */
299			error = VNOP_BLOCKMAP (vp, current_offset, remaining_length,
300					       &io_blockno, &io_bytecount, NULL, VNODE_READ, NULL);
301
302			if (error) {
303				goto trim_exit;
304			}
305
306			extents[trim_index].offset = (uint64_t) io_blockno * (u_int64_t) blocksize;
307			extents[trim_index].length = io_bytecount;
308
309			trim_index++;
310
311			if (trim_index == MAX_BATCH_TO_TRIM) {
312
313				if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
314					cs_unmap.extentsCount = trim_index;
315					error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
316				} else {
317					unmap.extentsCount = trim_index;
318					error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
319				}
320				if (error) {
321					goto trim_exit;
322				}
323				trim_index = 0;
324			}
325			trimmed += io_bytecount;
326			current_offset += io_bytecount;
327			remaining_length -= io_bytecount;
328		}
329		tl = tl->tl_next;
330	}
331	if (trim_index) {
332		if (vp->v_mount->mnt_ioflags & MNT_IOFLAGS_CSUNMAP_SUPPORTED) {
333			cs_unmap.extentsCount = trim_index;
334			error = VNOP_IOCTL(devvp, _DKIOCCSUNMAP, (caddr_t)&cs_unmap, 0, vfs_context_kernel());
335		} else {
336			unmap.extentsCount = trim_index;
337			error = VNOP_IOCTL(devvp, DKIOCUNMAP, (caddr_t)&unmap, 0, vfs_context_kernel());
338		}
339	}
340trim_exit:
341	kfree(extents, sizeof(dk_extent_t) * MAX_BATCH_TO_TRIM);
342
343	return error;
344}
345