1/*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/errno.h>
30
31#include <mach/mach_types.h>
32#include <mach/mach_traps.h>
33#include <mach/host_priv.h>
34#include <mach/kern_return.h>
35#include <mach/memory_object_control.h>
36#include <mach/memory_object_types.h>
37#include <mach/port.h>
38#include <mach/policy.h>
39#include <mach/upl.h>
40#include <mach/thread_act.h>
41
42#include <kern/assert.h>
43#include <kern/host.h>
44#include <kern/thread.h>
45
46#include <ipc/ipc_port.h>
47#include <ipc/ipc_space.h>
48
49#include <default_pager/default_pager_types.h>
50#include <default_pager/default_pager_object_server.h>
51
52#include <vm/vm_map.h>
53#include <vm/vm_pageout.h>
54#include <vm/memory_object.h>
55#include <vm/vm_pageout.h>
56#include <vm/vm_protos.h>
57#include <vm/vm_purgeable_internal.h>
58
59
60/* BSD VM COMPONENT INTERFACES */
61int
62get_map_nentries(
63	vm_map_t);
64
65vm_offset_t
66get_map_start(
67	vm_map_t);
68
69vm_offset_t
70get_map_end(
71	vm_map_t);
72
73/*
74 *
75 */
76int
77get_map_nentries(
78	vm_map_t map)
79{
80	return(map->hdr.nentries);
81}
82
83mach_vm_offset_t
84mach_get_vm_start(vm_map_t map)
85{
86	return( vm_map_first_entry(map)->vme_start);
87}
88
89mach_vm_offset_t
90mach_get_vm_end(vm_map_t map)
91{
92	return( vm_map_last_entry(map)->vme_end);
93}
94
95/*
96 * BSD VNODE PAGER
97 */
98
99const struct memory_object_pager_ops vnode_pager_ops = {
100	vnode_pager_reference,
101	vnode_pager_deallocate,
102	vnode_pager_init,
103	vnode_pager_terminate,
104	vnode_pager_data_request,
105	vnode_pager_data_return,
106	vnode_pager_data_initialize,
107	vnode_pager_data_unlock,
108	vnode_pager_synchronize,
109	vnode_pager_map,
110	vnode_pager_last_unmap,
111	NULL, /* data_reclaim */
112	"vnode pager"
113};
114
115typedef struct vnode_pager {
116	struct ipc_object_header	pager_header;	/* fake ip_kotype()		*/
117	memory_object_pager_ops_t pager_ops;	/* == &vnode_pager_ops	     */
118	unsigned int		ref_count;	/* reference count	     */
119	memory_object_control_t control_handle;	/* mem object control handle */
120	struct vnode		*vnode_handle;	/* vnode handle 	     */
121} *vnode_pager_t;
122
123#define pager_ikot pager_header.io_bits
124
125ipc_port_t
126trigger_name_to_port(			/* forward */
127	mach_port_t);
128
129kern_return_t
130vnode_pager_cluster_read(		/* forward */
131	vnode_pager_t,
132	vm_object_offset_t,
133	vm_object_offset_t,
134	uint32_t,
135	vm_size_t);
136
137void
138vnode_pager_cluster_write(		/* forward */
139	vnode_pager_t,
140	vm_object_offset_t,
141	vm_size_t,
142	vm_object_offset_t *,
143	int *,
144	int);
145
146
147vnode_pager_t
148vnode_object_create(			/* forward */
149	struct vnode *);
150
151vnode_pager_t
152vnode_pager_lookup(			/* forward */
153	memory_object_t);
154
155zone_t	vnode_pager_zone;
156
157
158#define	VNODE_PAGER_NULL	((vnode_pager_t) 0)
159
160/* TODO: Should be set dynamically by vnode_pager_init() */
161#define CLUSTER_SHIFT 	1
162
163/* TODO: Should be set dynamically by vnode_pager_bootstrap() */
164#define	MAX_VNODE		10000
165
166
167#if DEBUG
168int pagerdebug=0;
169
170#define PAGER_ALL		0xffffffff
171#define	PAGER_INIT		0x00000001
172#define	PAGER_PAGEIN	0x00000002
173
174#define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
175#else
176#define PAGER_DEBUG(LEVEL, A)
177#endif
178
179extern int proc_resetpcontrol(int);
180
181#if DEVELOPMENT || DEBUG
182extern unsigned long vm_cs_validated_resets;
183#endif
184
185/*
186 *	Routine:	mach_macx_triggers
187 *	Function:
188 *		Syscall interface to set the call backs for low and
189 *		high water marks.
190 */
191int
192mach_macx_triggers(
193	struct macx_triggers_args *args)
194{
195	int	hi_water = args->hi_water;
196	int	low_water = args->low_water;
197	int	flags = args->flags;
198	mach_port_t	trigger_name = args->alert_port;
199	kern_return_t kr;
200	memory_object_default_t	default_pager;
201	ipc_port_t		trigger_port;
202
203	default_pager = MEMORY_OBJECT_DEFAULT_NULL;
204	kr = host_default_memory_manager(host_priv_self(),
205					&default_pager, 0);
206	if(kr != KERN_SUCCESS) {
207		return EINVAL;
208	}
209
210	if (((flags & SWAP_ENCRYPT_ON) && (flags & SWAP_ENCRYPT_OFF)) ||
211	    ((flags & SWAP_COMPACT_ENABLE) && (flags & SWAP_COMPACT_DISABLE))) {
212		/* can't have it both ways */
213		return EINVAL;
214	}
215
216	if (default_pager_init_flag == 0) {
217               start_def_pager(NULL);
218               default_pager_init_flag = 1;
219	}
220
221	if (flags & SWAP_ENCRYPT_ON) {
222		/* ENCRYPTED SWAP: tell default_pager to encrypt */
223		default_pager_triggers(default_pager,
224				       0, 0,
225				       SWAP_ENCRYPT_ON,
226				       IP_NULL);
227	} else if (flags & SWAP_ENCRYPT_OFF) {
228		/* ENCRYPTED SWAP: tell default_pager not to encrypt */
229		default_pager_triggers(default_pager,
230				       0, 0,
231				       SWAP_ENCRYPT_OFF,
232				       IP_NULL);
233	}
234
235	if (flags & USE_EMERGENCY_SWAP_FILE_FIRST) {
236		/*
237		 * Time to switch to the emergency segment.
238		 */
239		return default_pager_triggers(default_pager,
240					0, 0,
241					USE_EMERGENCY_SWAP_FILE_FIRST,
242					IP_NULL);
243	}
244
245	if (flags & SWAP_FILE_CREATION_ERROR) {
246		/*
247		 * For some reason, the dynamic pager failed to create a swap file.
248	 	 */
249		trigger_port = trigger_name_to_port(trigger_name);
250		if(trigger_port == NULL) {
251			return EINVAL;
252		}
253		/* trigger_port is locked and active */
254		ipc_port_make_send_locked(trigger_port);
255		ip_unlock(trigger_port);
256		default_pager_triggers(default_pager,
257					0, 0,
258					SWAP_FILE_CREATION_ERROR,
259					trigger_port);
260	}
261
262	if (flags & HI_WAT_ALERT) {
263		trigger_port = trigger_name_to_port(trigger_name);
264		if(trigger_port == NULL) {
265			return EINVAL;
266		}
267		/* trigger_port is locked and active */
268		ipc_port_make_send_locked(trigger_port);
269		ip_unlock(trigger_port);
270		default_pager_triggers(default_pager,
271				       hi_water, low_water,
272				       HI_WAT_ALERT, trigger_port);
273	}
274
275	if (flags & LO_WAT_ALERT) {
276		trigger_port = trigger_name_to_port(trigger_name);
277		if(trigger_port == NULL) {
278			return EINVAL;
279		}
280		/* trigger_port is locked and active */
281		ipc_port_make_send_locked(trigger_port);
282		ip_unlock(trigger_port);
283		default_pager_triggers(default_pager,
284				       hi_water, low_water,
285				       LO_WAT_ALERT, trigger_port);
286	}
287
288
289	if (flags & PROC_RESUME) {
290
291		/*
292		 * For this call, hi_water is used to pass in the pid of the process we want to resume
293		 * or unthrottle.  This is of course restricted to the superuser (checked inside of
294		 * proc_resetpcontrol).
295		 */
296
297		return proc_resetpcontrol(hi_water);
298	}
299
300	/*
301	 * Set thread scheduling priority and policy for the current thread
302	 * it is assumed for the time being that the thread setting the alert
303	 * is the same one which will be servicing it.
304	 *
305	 * XXX This does not belong in the kernel XXX
306	 */
307	if (flags & HI_WAT_ALERT) {
308		thread_precedence_policy_data_t		pre;
309		thread_extended_policy_data_t		ext;
310
311		ext.timeshare = FALSE;
312		pre.importance = INT32_MAX;
313
314		thread_policy_set(current_thread(),
315				  THREAD_EXTENDED_POLICY,
316				  (thread_policy_t)&ext,
317				  THREAD_EXTENDED_POLICY_COUNT);
318
319		thread_policy_set(current_thread(),
320				  THREAD_PRECEDENCE_POLICY,
321				  (thread_policy_t)&pre,
322				  THREAD_PRECEDENCE_POLICY_COUNT);
323
324		current_thread()->options |= TH_OPT_VMPRIV;
325	}
326
327	if (flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE)) {
328		return macx_backing_store_compaction(flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE));
329	}
330
331	return 0;
332}
333
334/*
335 *
336 */
337ipc_port_t
338trigger_name_to_port(
339	mach_port_t	trigger_name)
340{
341	ipc_port_t	trigger_port;
342	ipc_space_t	space;
343
344	if (trigger_name == 0)
345		return (NULL);
346
347	space  = current_space();
348	if(ipc_port_translate_receive(space, CAST_MACH_PORT_TO_NAME(trigger_name),
349						&trigger_port) != KERN_SUCCESS)
350		return (NULL);
351	return trigger_port;
352}
353
354
355extern int	uiomove64(addr64_t, int, void *);
356#define	MAX_RUN	32
357
358int
359memory_object_control_uiomove(
360	memory_object_control_t	control,
361	memory_object_offset_t	offset,
362	void		*	uio,
363	int			start_offset,
364	int			io_requested,
365	int			mark_dirty,
366	int			take_reference)
367{
368	vm_object_t		object;
369	vm_page_t		dst_page;
370	int			xsize;
371	int			retval = 0;
372	int			cur_run;
373	int			cur_needed;
374	int			i;
375	int			orig_offset;
376	vm_page_t		page_run[MAX_RUN];
377
378	object = memory_object_control_to_vm_object(control);
379	if (object == VM_OBJECT_NULL) {
380		return (0);
381	}
382	assert(!object->internal);
383
384	vm_object_lock(object);
385
386	if (mark_dirty && object->copy != VM_OBJECT_NULL) {
387		/*
388		 * We can't modify the pages without honoring
389		 * copy-on-write obligations first, so fall off
390		 * this optimized path and fall back to the regular
391		 * path.
392		 */
393		vm_object_unlock(object);
394		return 0;
395	}
396	orig_offset = start_offset;
397
398	while (io_requested && retval == 0) {
399
400		cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
401
402		if (cur_needed > MAX_RUN)
403		        cur_needed = MAX_RUN;
404
405		for (cur_run = 0; cur_run < cur_needed; ) {
406
407		        if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
408			        break;
409
410
411			if (dst_page->busy || dst_page->cleaning) {
412				/*
413				 * someone else is playing with the page... if we've
414				 * already collected pages into this run, go ahead
415				 * and process now, we can't block on this
416				 * page while holding other pages in the BUSY state
417				 * otherwise we will wait
418				 */
419				if (cur_run)
420					break;
421				PAGE_SLEEP(object, dst_page, THREAD_UNINT);
422				continue;
423			}
424			if (dst_page->laundry) {
425				dst_page->pageout = FALSE;
426
427				vm_pageout_steal_laundry(dst_page, FALSE);
428			}
429			/*
430			 * this routine is only called when copying
431			 * to/from real files... no need to consider
432			 * encrypted swap pages
433			 */
434			assert(!dst_page->encrypted);
435
436		        if (mark_dirty) {
437				SET_PAGE_DIRTY(dst_page, FALSE);
438				if (dst_page->cs_validated &&
439				    !dst_page->cs_tainted) {
440					/*
441					 * CODE SIGNING:
442					 * We're modifying a code-signed
443					 * page: force revalidate
444					 */
445					dst_page->cs_validated = FALSE;
446#if DEVELOPMENT || DEBUG
447                                        vm_cs_validated_resets++;
448#endif
449					pmap_disconnect(dst_page->phys_page);
450				}
451			}
452			dst_page->busy = TRUE;
453
454			page_run[cur_run++] = dst_page;
455
456			offset += PAGE_SIZE_64;
457		}
458		if (cur_run == 0)
459		        /*
460			 * we hit a 'hole' in the cache or
461			 * a page we don't want to try to handle,
462			 * so bail at this point
463			 * we'll unlock the object below
464			 */
465		        break;
466		vm_object_unlock(object);
467
468		for (i = 0; i < cur_run; i++) {
469
470		        dst_page = page_run[i];
471
472			if ((xsize = PAGE_SIZE - start_offset) > io_requested)
473			        xsize = io_requested;
474
475			if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << PAGE_SHIFT) + start_offset), xsize, uio)) )
476			        break;
477
478			io_requested -= xsize;
479			start_offset = 0;
480		}
481		vm_object_lock(object);
482
483		/*
484		 * if we have more than 1 page to work on
485		 * in the current run, or the original request
486		 * started at offset 0 of the page, or we're
487		 * processing multiple batches, we will move
488		 * the pages to the tail of the inactive queue
489		 * to implement an LRU for read/write accesses
490		 *
491		 * the check for orig_offset == 0 is there to
492		 * mitigate the cost of small (< page_size) requests
493		 * to the same page (this way we only move it once)
494		 */
495		if (take_reference && (cur_run > 1 || orig_offset == 0)) {
496
497			vm_page_lockspin_queues();
498
499			for (i = 0; i < cur_run; i++)
500				vm_page_lru(page_run[i]);
501
502			vm_page_unlock_queues();
503		}
504		for (i = 0; i < cur_run; i++) {
505		        dst_page = page_run[i];
506
507			/*
508			 * someone is explicitly referencing this page...
509			 * update clustered and speculative state
510			 *
511			 */
512			if (dst_page->clustered)
513				VM_PAGE_CONSUME_CLUSTERED(dst_page);
514
515			PAGE_WAKEUP_DONE(dst_page);
516		}
517		orig_offset = 0;
518	}
519	vm_object_unlock(object);
520
521	return (retval);
522}
523
524
525/*
526 *
527 */
528void
529vnode_pager_bootstrap(void)
530{
531	register vm_size_t      size;
532
533	size = (vm_size_t) sizeof(struct vnode_pager);
534	vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
535				PAGE_SIZE, "vnode pager structures");
536	zone_change(vnode_pager_zone, Z_CALLERACCT, FALSE);
537	zone_change(vnode_pager_zone, Z_NOENCRYPT, TRUE);
538
539
540#if CONFIG_CODE_DECRYPTION
541	apple_protect_pager_bootstrap();
542#endif	/* CONFIG_CODE_DECRYPTION */
543	swapfile_pager_bootstrap();
544	return;
545}
546
547/*
548 *
549 */
550memory_object_t
551vnode_pager_setup(
552	struct vnode	*vp,
553	__unused memory_object_t	pager)
554{
555	vnode_pager_t	vnode_object;
556
557	vnode_object = vnode_object_create(vp);
558	if (vnode_object == VNODE_PAGER_NULL)
559		panic("vnode_pager_setup: vnode_object_create() failed");
560	return((memory_object_t)vnode_object);
561}
562
563/*
564 *
565 */
566kern_return_t
567vnode_pager_init(memory_object_t mem_obj,
568		memory_object_control_t control,
569#if !DEBUG
570		 __unused
571#endif
572		 memory_object_cluster_size_t pg_size)
573{
574	vnode_pager_t   vnode_object;
575	kern_return_t   kr;
576	memory_object_attr_info_data_t  attributes;
577
578
579	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
580
581	if (control == MEMORY_OBJECT_CONTROL_NULL)
582		return KERN_INVALID_ARGUMENT;
583
584	vnode_object = vnode_pager_lookup(mem_obj);
585
586	memory_object_control_reference(control);
587
588	vnode_object->control_handle = control;
589
590	attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
591	/* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
592	attributes.cluster_size = (1 << (PAGE_SHIFT));
593	attributes.may_cache_object = TRUE;
594	attributes.temporary = TRUE;
595
596	kr = memory_object_change_attributes(
597					control,
598					MEMORY_OBJECT_ATTRIBUTE_INFO,
599					(memory_object_info_t) &attributes,
600					MEMORY_OBJECT_ATTR_INFO_COUNT);
601	if (kr != KERN_SUCCESS)
602		panic("vnode_pager_init: memory_object_change_attributes() failed");
603
604	return(KERN_SUCCESS);
605}
606
607/*
608 *
609 */
610kern_return_t
611vnode_pager_data_return(
612        memory_object_t		mem_obj,
613        memory_object_offset_t	offset,
614        memory_object_cluster_size_t		data_cnt,
615        memory_object_offset_t	*resid_offset,
616	int			*io_error,
617	__unused boolean_t		dirty,
618	__unused boolean_t		kernel_copy,
619	int			upl_flags)
620{
621	register vnode_pager_t	vnode_object;
622
623	vnode_object = vnode_pager_lookup(mem_obj);
624
625	vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
626
627	return KERN_SUCCESS;
628}
629
630kern_return_t
631vnode_pager_data_initialize(
632	__unused memory_object_t		mem_obj,
633	__unused memory_object_offset_t	offset,
634	__unused memory_object_cluster_size_t		data_cnt)
635{
636	panic("vnode_pager_data_initialize");
637	return KERN_FAILURE;
638}
639
640kern_return_t
641vnode_pager_data_unlock(
642	__unused memory_object_t		mem_obj,
643	__unused memory_object_offset_t	offset,
644	__unused memory_object_size_t		size,
645	__unused vm_prot_t		desired_access)
646{
647	return KERN_FAILURE;
648}
649
650kern_return_t
651vnode_pager_get_isinuse(
652	memory_object_t		mem_obj,
653	uint32_t		*isinuse)
654{
655	vnode_pager_t	vnode_object;
656
657	if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
658		*isinuse = 1;
659		return KERN_INVALID_ARGUMENT;
660	}
661
662	vnode_object = vnode_pager_lookup(mem_obj);
663
664	*isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
665	return KERN_SUCCESS;
666}
667
668kern_return_t
669vnode_pager_get_throttle_io_limit(
670	memory_object_t		mem_obj,
671	uint32_t		*limit)
672{
673	vnode_pager_t	vnode_object;
674
675	if (mem_obj->mo_pager_ops != &vnode_pager_ops)
676		return KERN_INVALID_ARGUMENT;
677
678	vnode_object = vnode_pager_lookup(mem_obj);
679
680	(void)vnode_pager_return_throttle_io_limit(vnode_object->vnode_handle, limit);
681	return KERN_SUCCESS;
682}
683
684kern_return_t
685vnode_pager_get_isSSD(
686	memory_object_t		mem_obj,
687	boolean_t		*isSSD)
688{
689	vnode_pager_t	vnode_object;
690
691	if (mem_obj->mo_pager_ops != &vnode_pager_ops)
692		return KERN_INVALID_ARGUMENT;
693
694	vnode_object = vnode_pager_lookup(mem_obj);
695
696	*isSSD = vnode_pager_isSSD(vnode_object->vnode_handle);
697	return KERN_SUCCESS;
698}
699
700kern_return_t
701vnode_pager_get_object_size(
702	memory_object_t		mem_obj,
703	memory_object_offset_t	*length)
704{
705	vnode_pager_t	vnode_object;
706
707	if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
708		*length = 0;
709		return KERN_INVALID_ARGUMENT;
710	}
711
712	vnode_object = vnode_pager_lookup(mem_obj);
713
714	*length = vnode_pager_get_filesize(vnode_object->vnode_handle);
715	return KERN_SUCCESS;
716}
717
718kern_return_t
719vnode_pager_get_object_name(
720	memory_object_t		mem_obj,
721	char			*pathname,
722	vm_size_t		pathname_len,
723	char			*filename,
724	vm_size_t		filename_len,
725	boolean_t		*truncated_path_p)
726{
727	vnode_pager_t	vnode_object;
728
729	if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
730		return KERN_INVALID_ARGUMENT;
731	}
732
733	vnode_object = vnode_pager_lookup(mem_obj);
734
735	return vnode_pager_get_name(vnode_object->vnode_handle,
736				    pathname,
737				    pathname_len,
738				    filename,
739				    filename_len,
740				    truncated_path_p);
741}
742
743kern_return_t
744vnode_pager_get_object_mtime(
745	memory_object_t		mem_obj,
746	struct timespec		*mtime,
747	struct timespec		*cs_mtime)
748{
749	vnode_pager_t	vnode_object;
750
751	if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
752		return KERN_INVALID_ARGUMENT;
753	}
754
755	vnode_object = vnode_pager_lookup(mem_obj);
756
757	return vnode_pager_get_mtime(vnode_object->vnode_handle,
758				     mtime,
759				     cs_mtime);
760}
761
762kern_return_t
763vnode_pager_get_object_cs_blobs(
764	memory_object_t	mem_obj,
765	void		**blobs)
766{
767	vnode_pager_t	vnode_object;
768
769	if (mem_obj == MEMORY_OBJECT_NULL ||
770	    mem_obj->mo_pager_ops != &vnode_pager_ops) {
771		return KERN_INVALID_ARGUMENT;
772	}
773
774	vnode_object = vnode_pager_lookup(mem_obj);
775
776	return vnode_pager_get_cs_blobs(vnode_object->vnode_handle,
777					blobs);
778}
779
780#if CHECK_CS_VALIDATION_BITMAP
781kern_return_t
782vnode_pager_cs_check_validation_bitmap(
783	memory_object_t	mem_obj,
784	memory_object_offset_t	offset,
785        int		optype	)
786{
787	vnode_pager_t	vnode_object;
788
789	if (mem_obj == MEMORY_OBJECT_NULL ||
790	    mem_obj->mo_pager_ops != &vnode_pager_ops) {
791		return KERN_INVALID_ARGUMENT;
792	}
793
794	vnode_object = vnode_pager_lookup(mem_obj);
795	return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype );
796}
797#endif /* CHECK_CS_VALIDATION_BITMAP */
798
799/*
800 *
801 */
802kern_return_t
803vnode_pager_data_request(
804	memory_object_t		mem_obj,
805	memory_object_offset_t	offset,
806	__unused memory_object_cluster_size_t	length,
807	__unused vm_prot_t	desired_access,
808	memory_object_fault_info_t	fault_info)
809{
810	vnode_pager_t		vnode_object;
811	memory_object_offset_t	base_offset;
812	vm_size_t		size;
813	uint32_t		io_streaming = 0;
814
815	vnode_object = vnode_pager_lookup(mem_obj);
816
817	size = MAX_UPL_TRANSFER_BYTES;
818	base_offset = offset;
819
820	if (memory_object_cluster_size(vnode_object->control_handle, &base_offset, &size, &io_streaming, fault_info) != KERN_SUCCESS)
821	        size = PAGE_SIZE;
822
823	assert(offset >= base_offset &&
824	       offset < base_offset + size);
825
826	return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
827}
828
829/*
830 *
831 */
832void
833vnode_pager_reference(
834	memory_object_t		mem_obj)
835{
836	register vnode_pager_t	vnode_object;
837	unsigned int		new_ref_count;
838
839	vnode_object = vnode_pager_lookup(mem_obj);
840	new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1);
841	assert(new_ref_count > 1);
842}
843
844/*
845 *
846 */
847void
848vnode_pager_deallocate(
849	memory_object_t		mem_obj)
850{
851	register vnode_pager_t	vnode_object;
852
853	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
854
855	vnode_object = vnode_pager_lookup(mem_obj);
856
857	if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) {
858		if (vnode_object->vnode_handle != NULL) {
859			vnode_pager_vrele(vnode_object->vnode_handle);
860		}
861		zfree(vnode_pager_zone, vnode_object);
862	}
863	return;
864}
865
866/*
867 *
868 */
869kern_return_t
870vnode_pager_terminate(
871#if !DEBUG
872	__unused
873#endif
874	memory_object_t	mem_obj)
875{
876	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
877
878	return(KERN_SUCCESS);
879}
880
881/*
882 *
883 */
884kern_return_t
885vnode_pager_synchronize(
886	memory_object_t		mem_obj,
887	memory_object_offset_t	offset,
888	memory_object_size_t		length,
889	__unused vm_sync_t		sync_flags)
890{
891	register vnode_pager_t	vnode_object;
892
893	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj));
894
895	vnode_object = vnode_pager_lookup(mem_obj);
896
897	memory_object_synchronize_completed(vnode_object->control_handle, offset, length);
898
899	return (KERN_SUCCESS);
900}
901
902/*
903 *
904 */
905kern_return_t
906vnode_pager_map(
907	memory_object_t		mem_obj,
908	vm_prot_t		prot)
909{
910	vnode_pager_t		vnode_object;
911	int			ret;
912	kern_return_t		kr;
913
914	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
915
916	vnode_object = vnode_pager_lookup(mem_obj);
917
918	ret = ubc_map(vnode_object->vnode_handle, prot);
919
920	if (ret != 0) {
921		kr = KERN_FAILURE;
922	} else {
923		kr = KERN_SUCCESS;
924	}
925
926	return kr;
927}
928
929kern_return_t
930vnode_pager_last_unmap(
931	memory_object_t		mem_obj)
932{
933	register vnode_pager_t	vnode_object;
934
935	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
936
937	vnode_object = vnode_pager_lookup(mem_obj);
938
939	ubc_unmap(vnode_object->vnode_handle);
940	return KERN_SUCCESS;
941}
942
943
944
945/*
946 *
947 */
948void
949vnode_pager_cluster_write(
950	vnode_pager_t		vnode_object,
951	vm_object_offset_t	offset,
952	vm_size_t		cnt,
953	vm_object_offset_t   *	resid_offset,
954	int		     *  io_error,
955	int			upl_flags)
956{
957	vm_size_t	size;
958	int		errno;
959
960	if (upl_flags & UPL_MSYNC) {
961
962	        upl_flags |= UPL_VNODE_PAGER;
963
964		if ( (upl_flags & UPL_IOSYNC) && io_error)
965		        upl_flags |= UPL_KEEPCACHED;
966
967	        while (cnt) {
968			size = (cnt < MAX_UPL_TRANSFER_BYTES) ? cnt : MAX_UPL_TRANSFER_BYTES; /* effective max */
969
970			assert((upl_size_t) size == size);
971			vnode_pageout(vnode_object->vnode_handle,
972				      NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
973
974			if ( (upl_flags & UPL_KEEPCACHED) ) {
975			        if ( (*io_error = errno) )
976				        break;
977			}
978			cnt    -= size;
979			offset += size;
980		}
981		if (resid_offset)
982			*resid_offset = offset;
983
984	} else {
985	        vm_object_offset_t      vnode_size;
986	        vm_object_offset_t	base_offset;
987
988	        /*
989		 * this is the pageout path
990		 */
991		vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
992
993		if (vnode_size > (offset + PAGE_SIZE)) {
994		        /*
995			 * preset the maximum size of the cluster
996			 * and put us on a nice cluster boundary...
997			 * and then clip the size to insure we
998			 * don't request past the end of the underlying file
999			 */
1000		        size = MAX_UPL_TRANSFER_BYTES;
1001		        base_offset = offset & ~((signed)(size - 1));
1002
1003			if ((base_offset + size) > vnode_size)
1004			        size = round_page(((vm_size_t)(vnode_size - base_offset)));
1005		} else {
1006		        /*
1007			 * we've been requested to page out a page beyond the current
1008			 * end of the 'file'... don't try to cluster in this case...
1009			 * we still need to send this page through because it might
1010			 * be marked precious and the underlying filesystem may need
1011			 * to do something with it (besides page it out)...
1012			 */
1013		        base_offset = offset;
1014			size = PAGE_SIZE;
1015		}
1016		assert((upl_size_t) size == size);
1017	        vnode_pageout(vnode_object->vnode_handle,
1018			      NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size,
1019			      (upl_flags & UPL_IOSYNC) | UPL_VNODE_PAGER, NULL);
1020	}
1021}
1022
1023
1024/*
1025 *
1026 */
1027kern_return_t
1028vnode_pager_cluster_read(
1029	vnode_pager_t		vnode_object,
1030	vm_object_offset_t	base_offset,
1031	vm_object_offset_t	offset,
1032	uint32_t		io_streaming,
1033	vm_size_t		cnt)
1034{
1035	int		local_error = 0;
1036	int		kret;
1037	int		flags = 0;
1038
1039	assert(! (cnt & PAGE_MASK));
1040
1041	if (io_streaming)
1042		flags |= UPL_IOSTREAMING;
1043
1044	assert((upl_size_t) cnt == cnt);
1045	kret = vnode_pagein(vnode_object->vnode_handle,
1046			    (upl_t) NULL,
1047			    (upl_offset_t) (offset - base_offset),
1048			    base_offset,
1049			    (upl_size_t) cnt,
1050			    flags,
1051			    &local_error);
1052/*
1053	if(kret == PAGER_ABSENT) {
1054	Need to work out the defs here, 1 corresponds to PAGER_ABSENT
1055	defined in bsd/vm/vm_pager.h  However, we should not be including
1056	that file here it is a layering violation.
1057*/
1058	if (kret == 1) {
1059		int	uplflags;
1060		upl_t	upl = NULL;
1061		unsigned int	count = 0;
1062		kern_return_t	kr;
1063
1064		uplflags = (UPL_NO_SYNC |
1065			    UPL_CLEAN_IN_PLACE |
1066			    UPL_SET_INTERNAL);
1067		count = 0;
1068		assert((upl_size_t) cnt == cnt);
1069		kr = memory_object_upl_request(vnode_object->control_handle,
1070					       base_offset, (upl_size_t) cnt,
1071					       &upl, NULL, &count, uplflags);
1072		if (kr == KERN_SUCCESS) {
1073			upl_abort(upl, 0);
1074			upl_deallocate(upl);
1075		} else {
1076			/*
1077			 * We couldn't gather the page list, probably
1078			 * because the memory object doesn't have a link
1079			 * to a VM object anymore (forced unmount, for
1080			 * example).  Just return an error to the vm_fault()
1081			 * path and let it handle it.
1082			 */
1083		}
1084
1085		return KERN_FAILURE;
1086	}
1087
1088	return KERN_SUCCESS;
1089
1090}
1091
1092
1093/*
1094 *
1095 */
1096void
1097vnode_pager_release_from_cache(
1098		int	*cnt)
1099{
1100	memory_object_free_from_cache(
1101			&realhost, &vnode_pager_ops, cnt);
1102}
1103
1104/*
1105 *
1106 */
1107vnode_pager_t
1108vnode_object_create(
1109        struct vnode *vp)
1110{
1111	register vnode_pager_t  vnode_object;
1112
1113	vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
1114	if (vnode_object == VNODE_PAGER_NULL)
1115		return(VNODE_PAGER_NULL);
1116
1117	/*
1118	 * The vm_map call takes both named entry ports and raw memory
1119	 * objects in the same parameter.  We need to make sure that
1120	 * vm_map does not see this object as a named entry port.  So,
1121	 * we reserve the first word in the object for a fake ip_kotype
1122	 * setting - that will tell vm_map to use it as a memory object.
1123	 */
1124	vnode_object->pager_ops = &vnode_pager_ops;
1125	vnode_object->pager_ikot = IKOT_MEMORY_OBJECT;
1126	vnode_object->ref_count = 1;
1127	vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL;
1128	vnode_object->vnode_handle = vp;
1129
1130	return(vnode_object);
1131}
1132
1133/*
1134 *
1135 */
1136vnode_pager_t
1137vnode_pager_lookup(
1138	memory_object_t	 name)
1139{
1140	vnode_pager_t	vnode_object;
1141
1142	vnode_object = (vnode_pager_t)name;
1143	assert(vnode_object->pager_ops == &vnode_pager_ops);
1144	return (vnode_object);
1145}
1146
1147
1148/*********************** proc_info implementation *************/
1149
1150#include <sys/bsdtask_info.h>
1151
1152static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
1153
1154
1155int
1156fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t  *vid)
1157{
1158
1159	vm_map_t map;
1160	vm_map_offset_t	address = (vm_map_offset_t )arg;
1161	vm_map_entry_t		tmp_entry;
1162	vm_map_entry_t		entry;
1163	vm_map_offset_t		start;
1164	vm_region_extended_info_data_t extended;
1165	vm_region_top_info_data_t top;
1166
1167	    task_lock(task);
1168	    map = task->map;
1169	    if (map == VM_MAP_NULL)
1170	    {
1171			task_unlock(task);
1172			return(0);
1173	    }
1174	    vm_map_reference(map);
1175	    task_unlock(task);
1176
1177	    vm_map_lock_read(map);
1178
1179	    start = address;
1180	    if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1181		if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1182			vm_map_unlock_read(map);
1183	    		vm_map_deallocate(map);
1184		   	return(0);
1185		}
1186	    } else {
1187		entry = tmp_entry;
1188	    }
1189
1190	    start = entry->vme_start;
1191
1192	    pinfo->pri_offset = entry->offset;
1193	    pinfo->pri_protection = entry->protection;
1194	    pinfo->pri_max_protection = entry->max_protection;
1195	    pinfo->pri_inheritance = entry->inheritance;
1196	    pinfo->pri_behavior = entry->behavior;
1197	    pinfo->pri_user_wired_count = entry->user_wired_count;
1198	    pinfo->pri_user_tag = entry->alias;
1199
1200	    if (entry->is_sub_map) {
1201		pinfo->pri_flags |= PROC_REGION_SUBMAP;
1202	    } else {
1203		if (entry->is_shared)
1204			pinfo->pri_flags |= PROC_REGION_SHARED;
1205	    }
1206
1207
1208	    extended.protection = entry->protection;
1209	    extended.user_tag = entry->alias;
1210	    extended.pages_resident = 0;
1211	    extended.pages_swapped_out = 0;
1212	    extended.pages_shared_now_private = 0;
1213	    extended.pages_dirtied = 0;
1214	    extended.external_pager = 0;
1215	    extended.shadow_depth = 0;
1216
1217	    vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended);
1218
1219	    if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
1220	            extended.share_mode = SM_PRIVATE;
1221
1222	    top.private_pages_resident = 0;
1223	    top.shared_pages_resident = 0;
1224	    vm_map_region_top_walk(entry, &top);
1225
1226
1227	    pinfo->pri_pages_resident = extended.pages_resident;
1228	    pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1229	    pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1230	    pinfo->pri_pages_dirtied = extended.pages_dirtied;
1231	    pinfo->pri_ref_count = extended.ref_count;
1232	    pinfo->pri_shadow_depth = extended.shadow_depth;
1233	    pinfo->pri_share_mode = extended.share_mode;
1234
1235	    pinfo->pri_private_pages_resident = top.private_pages_resident;
1236	    pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1237	    pinfo->pri_obj_id = top.obj_id;
1238
1239	    pinfo->pri_address = (uint64_t)start;
1240	    pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1241	    pinfo->pri_depth = 0;
1242
1243	    if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1244		*vnodeaddr = (uintptr_t)0;
1245
1246		if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
1247			vm_map_unlock_read(map);
1248	    		vm_map_deallocate(map);
1249			return(1);
1250		}
1251	    }
1252
1253	    vm_map_unlock_read(map);
1254	    vm_map_deallocate(map);
1255	    return(1);
1256}
1257
1258int
1259fill_procregioninfo_onlymappedvnodes(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t  *vid)
1260{
1261
1262	vm_map_t map;
1263	vm_map_offset_t	address = (vm_map_offset_t )arg;
1264	vm_map_entry_t		tmp_entry;
1265	vm_map_entry_t		entry;
1266
1267	task_lock(task);
1268	map = task->map;
1269	if (map == VM_MAP_NULL)
1270	{
1271		task_unlock(task);
1272		return(0);
1273	}
1274	vm_map_reference(map);
1275	task_unlock(task);
1276
1277	vm_map_lock_read(map);
1278
1279	if (!vm_map_lookup_entry(map, address, &tmp_entry)) {
1280		if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1281			vm_map_unlock_read(map);
1282			vm_map_deallocate(map);
1283		   	return(0);
1284		}
1285	} else {
1286		entry = tmp_entry;
1287	}
1288
1289	while ((entry != vm_map_to_entry(map))) {
1290		*vnodeaddr = 0;
1291		*vid = 0;
1292
1293		if (entry->is_sub_map == 0) {
1294			if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
1295
1296				pinfo->pri_offset = entry->offset;
1297				pinfo->pri_protection = entry->protection;
1298				pinfo->pri_max_protection = entry->max_protection;
1299				pinfo->pri_inheritance = entry->inheritance;
1300				pinfo->pri_behavior = entry->behavior;
1301				pinfo->pri_user_wired_count = entry->user_wired_count;
1302				pinfo->pri_user_tag = entry->alias;
1303
1304				if (entry->is_shared)
1305					pinfo->pri_flags |= PROC_REGION_SHARED;
1306
1307				pinfo->pri_pages_resident = 0;
1308				pinfo->pri_pages_shared_now_private = 0;
1309				pinfo->pri_pages_swapped_out = 0;
1310				pinfo->pri_pages_dirtied = 0;
1311				pinfo->pri_ref_count = 0;
1312				pinfo->pri_shadow_depth = 0;
1313				pinfo->pri_share_mode = 0;
1314
1315				pinfo->pri_private_pages_resident = 0;
1316				pinfo->pri_shared_pages_resident = 0;
1317				pinfo->pri_obj_id = 0;
1318
1319				pinfo->pri_address = (uint64_t)entry->vme_start;
1320				pinfo->pri_size = (uint64_t)(entry->vme_end - entry->vme_start);
1321				pinfo->pri_depth = 0;
1322
1323				vm_map_unlock_read(map);
1324				vm_map_deallocate(map);
1325				return(1);
1326			}
1327		}
1328
1329		/* Keep searching for a vnode-backed mapping */
1330		entry = entry->vme_next;
1331	}
1332
1333	vm_map_unlock_read(map);
1334	vm_map_deallocate(map);
1335	return(0);
1336}
1337
1338static int
1339fill_vnodeinfoforaddr(
1340	vm_map_entry_t			entry,
1341	uintptr_t * vnodeaddr,
1342	uint32_t * vid)
1343{
1344	vm_object_t	top_object, object;
1345	memory_object_t memory_object;
1346	memory_object_pager_ops_t pager_ops;
1347	kern_return_t	kr;
1348	int		shadow_depth;
1349
1350
1351	if (entry->is_sub_map) {
1352		return(0);
1353	} else {
1354		/*
1355		 * The last object in the shadow chain has the
1356		 * relevant pager information.
1357		 */
1358		top_object = entry->object.vm_object;
1359		if (top_object == VM_OBJECT_NULL) {
1360			object = VM_OBJECT_NULL;
1361			shadow_depth = 0;
1362		} else {
1363			vm_object_lock(top_object);
1364			for (object = top_object, shadow_depth = 0;
1365			     object->shadow != VM_OBJECT_NULL;
1366			     object = object->shadow, shadow_depth++) {
1367				vm_object_lock(object->shadow);
1368				vm_object_unlock(object);
1369			}
1370		}
1371	}
1372
1373	if (object == VM_OBJECT_NULL) {
1374		return(0);
1375	} else if (object->internal) {
1376		vm_object_unlock(object);
1377		return(0);
1378	} else if (! object->pager_ready ||
1379		   object->terminating ||
1380		   ! object->alive) {
1381		vm_object_unlock(object);
1382		return(0);
1383	} else {
1384		memory_object = object->pager;
1385		pager_ops = memory_object->mo_pager_ops;
1386		if (pager_ops == &vnode_pager_ops) {
1387			kr = vnode_pager_get_object_vnode(
1388				memory_object,
1389				vnodeaddr, vid);
1390			if (kr != KERN_SUCCESS) {
1391				vm_object_unlock(object);
1392				return(0);
1393			}
1394		} else {
1395			vm_object_unlock(object);
1396			return(0);
1397		}
1398	}
1399	vm_object_unlock(object);
1400	return(1);
1401}
1402
1403kern_return_t
1404vnode_pager_get_object_vnode (
1405	memory_object_t		mem_obj,
1406	uintptr_t * vnodeaddr,
1407	uint32_t * vid)
1408{
1409	vnode_pager_t	vnode_object;
1410
1411	vnode_object = vnode_pager_lookup(mem_obj);
1412	if (vnode_object->vnode_handle)  {
1413		*vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
1414		*vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1415
1416		return(KERN_SUCCESS);
1417	}
1418
1419	return(KERN_FAILURE);
1420}
1421
1422#if CONFIG_IOSCHED
1423kern_return_t
1424vnode_pager_get_object_devvp(
1425	memory_object_t 	mem_obj,
1426	uintptr_t 		*devvp)
1427{
1428	struct vnode 	*vp;
1429	uint32_t 	vid;
1430
1431	if(vnode_pager_get_object_vnode(mem_obj, (uintptr_t *)&vp, (uint32_t *)&vid) != KERN_SUCCESS)
1432		return (KERN_FAILURE);
1433	*devvp = (uintptr_t)vnode_mountdevvp(vp);
1434	if (*devvp)
1435		return (KERN_SUCCESS);
1436	return (KERN_FAILURE);
1437}
1438#endif
1439
1440/*
1441 * Find the underlying vnode object for the given vm_map_entry.  If found, return with the
1442 * object locked, otherwise return NULL with nothing locked.
1443 */
1444
1445vm_object_t
1446find_vnode_object(
1447	vm_map_entry_t	entry
1448)
1449{
1450	vm_object_t			top_object, object;
1451	memory_object_t 		memory_object;
1452	memory_object_pager_ops_t	pager_ops;
1453
1454	if (!entry->is_sub_map) {
1455
1456		/*
1457		 * The last object in the shadow chain has the
1458		 * relevant pager information.
1459		 */
1460
1461		top_object = entry->object.vm_object;
1462
1463		if (top_object) {
1464			vm_object_lock(top_object);
1465
1466			for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
1467				vm_object_lock(object->shadow);
1468				vm_object_unlock(object);
1469			}
1470
1471			if (object && !object->internal && object->pager_ready && !object->terminating &&
1472			    object->alive) {
1473				memory_object = object->pager;
1474				pager_ops = memory_object->mo_pager_ops;
1475
1476				/*
1477				 * If this object points to the vnode_pager_ops, then we found what we're
1478				 * looking for.  Otherwise, this vm_map_entry doesn't have an underlying
1479				 * vnode and so we fall through to the bottom and return NULL.
1480				 */
1481
1482				if (pager_ops == &vnode_pager_ops)
1483					return object;		/* we return with the object locked */
1484			}
1485
1486			vm_object_unlock(object);
1487		}
1488
1489	}
1490
1491	return(VM_OBJECT_NULL);
1492}
1493