1/*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/errno.h>
30
31#include <mach/mach_types.h>
32#include <mach/mach_traps.h>
33#include <mach/host_priv.h>
34#include <mach/kern_return.h>
35#include <mach/memory_object_control.h>
36#include <mach/memory_object_types.h>
37#include <mach/port.h>
38#include <mach/policy.h>
39#include <mach/upl.h>
40#include <mach/thread_act.h>
41
42#include <kern/assert.h>
43#include <kern/host.h>
44#include <kern/thread.h>
45
46#include <ipc/ipc_port.h>
47#include <ipc/ipc_space.h>
48
49#include <default_pager/default_pager_types.h>
50#include <default_pager/default_pager_object_server.h>
51
52#include <vm/vm_map.h>
53#include <vm/vm_pageout.h>
54#include <vm/memory_object.h>
55#include <vm/vm_pageout.h>
56#include <vm/vm_protos.h>
57#include <vm/vm_purgeable_internal.h>
58
59
60/* BSD VM COMPONENT INTERFACES */
61int
62get_map_nentries(
63	vm_map_t);
64
65vm_offset_t
66get_map_start(
67	vm_map_t);
68
69vm_offset_t
70get_map_end(
71	vm_map_t);
72
73/*
74 *
75 */
76int
77get_map_nentries(
78	vm_map_t map)
79{
80	return(map->hdr.nentries);
81}
82
83mach_vm_offset_t
84mach_get_vm_start(vm_map_t map)
85{
86	return( vm_map_first_entry(map)->vme_start);
87}
88
89mach_vm_offset_t
90mach_get_vm_end(vm_map_t map)
91{
92	return( vm_map_last_entry(map)->vme_end);
93}
94
95/*
96 * BSD VNODE PAGER
97 */
98
99const struct memory_object_pager_ops vnode_pager_ops = {
100	vnode_pager_reference,
101	vnode_pager_deallocate,
102	vnode_pager_init,
103	vnode_pager_terminate,
104	vnode_pager_data_request,
105	vnode_pager_data_return,
106	vnode_pager_data_initialize,
107	vnode_pager_data_unlock,
108	vnode_pager_synchronize,
109	vnode_pager_map,
110	vnode_pager_last_unmap,
111	NULL, /* data_reclaim */
112	"vnode pager"
113};
114
115typedef struct vnode_pager {
116	struct ipc_object_header	pager_header;	/* fake ip_kotype()		*/
117	memory_object_pager_ops_t pager_ops;	/* == &vnode_pager_ops	     */
118	unsigned int		ref_count;	/* reference count	     */
119	memory_object_control_t control_handle;	/* mem object control handle */
120	struct vnode		*vnode_handle;	/* vnode handle 	     */
121} *vnode_pager_t;
122
123#define pager_ikot pager_header.io_bits
124
125ipc_port_t
126trigger_name_to_port(			/* forward */
127	mach_port_t);
128
129kern_return_t
130vnode_pager_cluster_read(		/* forward */
131	vnode_pager_t,
132	vm_object_offset_t,
133	vm_object_offset_t,
134	uint32_t,
135	vm_size_t);
136
137void
138vnode_pager_cluster_write(		/* forward */
139	vnode_pager_t,
140	vm_object_offset_t,
141	vm_size_t,
142	vm_object_offset_t *,
143	int *,
144	int);
145
146
147vnode_pager_t
148vnode_object_create(			/* forward */
149	struct vnode *);
150
151vnode_pager_t
152vnode_pager_lookup(			/* forward */
153	memory_object_t);
154
155zone_t	vnode_pager_zone;
156
157
158#define	VNODE_PAGER_NULL	((vnode_pager_t) 0)
159
160/* TODO: Should be set dynamically by vnode_pager_init() */
161#define CLUSTER_SHIFT 	1
162
163/* TODO: Should be set dynamically by vnode_pager_bootstrap() */
164#define	MAX_VNODE		10000
165
166
167#if DEBUG
168int pagerdebug=0;
169
170#define PAGER_ALL		0xffffffff
171#define	PAGER_INIT		0x00000001
172#define	PAGER_PAGEIN	0x00000002
173
174#define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
175#else
176#define PAGER_DEBUG(LEVEL, A)
177#endif
178
179extern int proc_resetpcontrol(int);
180
181#if DEVELOPMENT || DEBUG
182extern unsigned long vm_cs_validated_resets;
183#endif
184
185/*
186 *	Routine:	mach_macx_triggers
187 *	Function:
188 *		Syscall interface to set the call backs for low and
189 *		high water marks.
190 */
191int
192mach_macx_triggers(
193	struct macx_triggers_args *args)
194{
195	int	hi_water = args->hi_water;
196	int	low_water = args->low_water;
197	int	flags = args->flags;
198	mach_port_t	trigger_name = args->alert_port;
199	kern_return_t kr;
200	memory_object_default_t	default_pager;
201	ipc_port_t		trigger_port;
202
203	default_pager = MEMORY_OBJECT_DEFAULT_NULL;
204	kr = host_default_memory_manager(host_priv_self(),
205					&default_pager, 0);
206	if(kr != KERN_SUCCESS) {
207		return EINVAL;
208	}
209
210	if (((flags & SWAP_ENCRYPT_ON) && (flags & SWAP_ENCRYPT_OFF)) ||
211	    ((flags & SWAP_COMPACT_ENABLE) && (flags & SWAP_COMPACT_DISABLE))) {
212		/* can't have it both ways */
213		return EINVAL;
214	}
215
216	if (default_pager_init_flag == 0) {
217               start_def_pager(NULL);
218               default_pager_init_flag = 1;
219	}
220
221	if (flags & SWAP_ENCRYPT_ON) {
222		/* ENCRYPTED SWAP: tell default_pager to encrypt */
223		default_pager_triggers(default_pager,
224				       0, 0,
225				       SWAP_ENCRYPT_ON,
226				       IP_NULL);
227	} else if (flags & SWAP_ENCRYPT_OFF) {
228		/* ENCRYPTED SWAP: tell default_pager not to encrypt */
229		default_pager_triggers(default_pager,
230				       0, 0,
231				       SWAP_ENCRYPT_OFF,
232				       IP_NULL);
233	}
234
235	if (flags & USE_EMERGENCY_SWAP_FILE_FIRST) {
236		/*
237		 * Time to switch to the emergency segment.
238		 */
239		return default_pager_triggers(default_pager,
240					0, 0,
241					USE_EMERGENCY_SWAP_FILE_FIRST,
242					IP_NULL);
243	}
244
245	if (flags & SWAP_FILE_CREATION_ERROR) {
246		/*
247		 * For some reason, the dynamic pager failed to create a swap file.
248	 	 */
249		trigger_port = trigger_name_to_port(trigger_name);
250		if(trigger_port == NULL) {
251			return EINVAL;
252		}
253		/* trigger_port is locked and active */
254		ipc_port_make_send_locked(trigger_port);
255		/* now unlocked */
256		default_pager_triggers(default_pager,
257					0, 0,
258					SWAP_FILE_CREATION_ERROR,
259					trigger_port);
260	}
261
262	if (flags & HI_WAT_ALERT) {
263		trigger_port = trigger_name_to_port(trigger_name);
264		if(trigger_port == NULL) {
265			return EINVAL;
266		}
267		/* trigger_port is locked and active */
268		ipc_port_make_send_locked(trigger_port);
269		/* now unlocked */
270		default_pager_triggers(default_pager,
271				       hi_water, low_water,
272				       HI_WAT_ALERT, trigger_port);
273	}
274
275	if (flags & LO_WAT_ALERT) {
276		trigger_port = trigger_name_to_port(trigger_name);
277		if(trigger_port == NULL) {
278			return EINVAL;
279		}
280		/* trigger_port is locked and active */
281		ipc_port_make_send_locked(trigger_port);
282		/* and now its unlocked */
283		default_pager_triggers(default_pager,
284				       hi_water, low_water,
285				       LO_WAT_ALERT, trigger_port);
286	}
287
288
289	if (flags & PROC_RESUME) {
290
291		/*
292		 * For this call, hi_water is used to pass in the pid of the process we want to resume
293		 * or unthrottle.  This is of course restricted to the superuser (checked inside of
294		 * proc_resetpcontrol).
295		 */
296
297		return proc_resetpcontrol(hi_water);
298	}
299
300	/*
301	 * Set thread scheduling priority and policy for the current thread
302	 * it is assumed for the time being that the thread setting the alert
303	 * is the same one which will be servicing it.
304	 *
305	 * XXX This does not belong in the kernel XXX
306	 */
307	if (flags & HI_WAT_ALERT) {
308		thread_precedence_policy_data_t		pre;
309		thread_extended_policy_data_t		ext;
310
311		ext.timeshare = FALSE;
312		pre.importance = INT32_MAX;
313
314		thread_policy_set(current_thread(),
315				  THREAD_EXTENDED_POLICY,
316				  (thread_policy_t)&ext,
317				  THREAD_EXTENDED_POLICY_COUNT);
318
319		thread_policy_set(current_thread(),
320				  THREAD_PRECEDENCE_POLICY,
321				  (thread_policy_t)&pre,
322				  THREAD_PRECEDENCE_POLICY_COUNT);
323
324		current_thread()->options |= TH_OPT_VMPRIV;
325	}
326
327	if (flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE)) {
328		return macx_backing_store_compaction(flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE));
329	}
330
331	return 0;
332}
333
334/*
335 *
336 */
337ipc_port_t
338trigger_name_to_port(
339	mach_port_t	trigger_name)
340{
341	ipc_port_t	trigger_port;
342	ipc_space_t	space;
343
344	if (trigger_name == 0)
345		return (NULL);
346
347	space  = current_space();
348	if(ipc_port_translate_receive(space, CAST_MACH_PORT_TO_NAME(trigger_name),
349						&trigger_port) != KERN_SUCCESS)
350		return (NULL);
351	return trigger_port;
352}
353
354
355extern int	uiomove64(addr64_t, int, void *);
356#define	MAX_RUN	32
357
358int
359memory_object_control_uiomove(
360	memory_object_control_t	control,
361	memory_object_offset_t	offset,
362	void		*	uio,
363	int			start_offset,
364	int			io_requested,
365	int			mark_dirty,
366	int			take_reference)
367{
368	vm_object_t		object;
369	vm_page_t		dst_page;
370	int			xsize;
371	int			retval = 0;
372	int			cur_run;
373	int			cur_needed;
374	int			i;
375	int			orig_offset;
376	vm_page_t		page_run[MAX_RUN];
377
378	object = memory_object_control_to_vm_object(control);
379	if (object == VM_OBJECT_NULL) {
380		return (0);
381	}
382	assert(!object->internal);
383
384	vm_object_lock(object);
385
386	if (mark_dirty && object->copy != VM_OBJECT_NULL) {
387		/*
388		 * We can't modify the pages without honoring
389		 * copy-on-write obligations first, so fall off
390		 * this optimized path and fall back to the regular
391		 * path.
392		 */
393		vm_object_unlock(object);
394		return 0;
395	}
396	orig_offset = start_offset;
397
398	while (io_requested && retval == 0) {
399
400		cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
401
402		if (cur_needed > MAX_RUN)
403		        cur_needed = MAX_RUN;
404
405		for (cur_run = 0; cur_run < cur_needed; ) {
406
407		        if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
408			        break;
409
410
411			if (dst_page->busy || dst_page->cleaning) {
412				/*
413				 * someone else is playing with the page... if we've
414				 * already collected pages into this run, go ahead
415				 * and process now, we can't block on this
416				 * page while holding other pages in the BUSY state
417				 * otherwise we will wait
418				 */
419				if (cur_run)
420					break;
421				PAGE_SLEEP(object, dst_page, THREAD_UNINT);
422				continue;
423			}
424			if (dst_page->laundry) {
425				dst_page->pageout = FALSE;
426
427				vm_pageout_steal_laundry(dst_page, FALSE);
428			}
429			/*
430			 * this routine is only called when copying
431			 * to/from real files... no need to consider
432			 * encrypted swap pages
433			 */
434			assert(!dst_page->encrypted);
435
436		        if (mark_dirty) {
437				SET_PAGE_DIRTY(dst_page, FALSE);
438				if (dst_page->cs_validated &&
439				    !dst_page->cs_tainted) {
440					/*
441					 * CODE SIGNING:
442					 * We're modifying a code-signed
443					 * page: force revalidate
444					 */
445					dst_page->cs_validated = FALSE;
446#if DEVELOPMENT || DEBUG
447                                        vm_cs_validated_resets++;
448#endif
449					pmap_disconnect(dst_page->phys_page);
450				}
451			}
452			dst_page->busy = TRUE;
453
454			page_run[cur_run++] = dst_page;
455
456			offset += PAGE_SIZE_64;
457		}
458		if (cur_run == 0)
459		        /*
460			 * we hit a 'hole' in the cache or
461			 * a page we don't want to try to handle,
462			 * so bail at this point
463			 * we'll unlock the object below
464			 */
465		        break;
466		vm_object_unlock(object);
467
468		for (i = 0; i < cur_run; i++) {
469
470		        dst_page = page_run[i];
471
472			if ((xsize = PAGE_SIZE - start_offset) > io_requested)
473			        xsize = io_requested;
474
475			if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) )
476			        break;
477
478			io_requested -= xsize;
479			start_offset = 0;
480		}
481		vm_object_lock(object);
482
483		/*
484		 * if we have more than 1 page to work on
485		 * in the current run, or the original request
486		 * started at offset 0 of the page, or we're
487		 * processing multiple batches, we will move
488		 * the pages to the tail of the inactive queue
489		 * to implement an LRU for read/write accesses
490		 *
491		 * the check for orig_offset == 0 is there to
492		 * mitigate the cost of small (< page_size) requests
493		 * to the same page (this way we only move it once)
494		 */
495		if (take_reference && (cur_run > 1 || orig_offset == 0)) {
496
497			vm_page_lockspin_queues();
498
499			for (i = 0; i < cur_run; i++)
500				vm_page_lru(page_run[i]);
501
502			vm_page_unlock_queues();
503		}
504		for (i = 0; i < cur_run; i++) {
505		        dst_page = page_run[i];
506
507			/*
508			 * someone is explicitly referencing this page...
509			 * update clustered and speculative state
510			 *
511			 */
512			VM_PAGE_CONSUME_CLUSTERED(dst_page);
513
514			PAGE_WAKEUP_DONE(dst_page);
515		}
516		orig_offset = 0;
517	}
518	vm_object_unlock(object);
519
520	return (retval);
521}
522
523
524/*
525 *
526 */
527void
528vnode_pager_bootstrap(void)
529{
530	register vm_size_t      size;
531
532	size = (vm_size_t) sizeof(struct vnode_pager);
533	vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
534				PAGE_SIZE, "vnode pager structures");
535	zone_change(vnode_pager_zone, Z_CALLERACCT, FALSE);
536	zone_change(vnode_pager_zone, Z_NOENCRYPT, TRUE);
537
538
539#if CONFIG_CODE_DECRYPTION
540	apple_protect_pager_bootstrap();
541#endif	/* CONFIG_CODE_DECRYPTION */
542	swapfile_pager_bootstrap();
543	return;
544}
545
546/*
547 *
548 */
549memory_object_t
550vnode_pager_setup(
551	struct vnode	*vp,
552	__unused memory_object_t	pager)
553{
554	vnode_pager_t	vnode_object;
555
556	vnode_object = vnode_object_create(vp);
557	if (vnode_object == VNODE_PAGER_NULL)
558		panic("vnode_pager_setup: vnode_object_create() failed");
559	return((memory_object_t)vnode_object);
560}
561
562/*
563 *
564 */
565kern_return_t
566vnode_pager_init(memory_object_t mem_obj,
567		memory_object_control_t control,
568#if !DEBUG
569		 __unused
570#endif
571		 memory_object_cluster_size_t pg_size)
572{
573	vnode_pager_t   vnode_object;
574	kern_return_t   kr;
575	memory_object_attr_info_data_t  attributes;
576
577
578	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
579
580	if (control == MEMORY_OBJECT_CONTROL_NULL)
581		return KERN_INVALID_ARGUMENT;
582
583	vnode_object = vnode_pager_lookup(mem_obj);
584
585	memory_object_control_reference(control);
586
587	vnode_object->control_handle = control;
588
589	attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
590	/* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
591	attributes.cluster_size = (1 << (PAGE_SHIFT));
592	attributes.may_cache_object = TRUE;
593	attributes.temporary = TRUE;
594
595	kr = memory_object_change_attributes(
596					control,
597					MEMORY_OBJECT_ATTRIBUTE_INFO,
598					(memory_object_info_t) &attributes,
599					MEMORY_OBJECT_ATTR_INFO_COUNT);
600	if (kr != KERN_SUCCESS)
601		panic("vnode_pager_init: memory_object_change_attributes() failed");
602
603	return(KERN_SUCCESS);
604}
605
606/*
607 *
608 */
609kern_return_t
610vnode_pager_data_return(
611        memory_object_t		mem_obj,
612        memory_object_offset_t	offset,
613        memory_object_cluster_size_t		data_cnt,
614        memory_object_offset_t	*resid_offset,
615	int			*io_error,
616	__unused boolean_t		dirty,
617	__unused boolean_t		kernel_copy,
618	int			upl_flags)
619{
620	register vnode_pager_t	vnode_object;
621
622	vnode_object = vnode_pager_lookup(mem_obj);
623
624	vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
625
626	return KERN_SUCCESS;
627}
628
629kern_return_t
630vnode_pager_data_initialize(
631	__unused memory_object_t		mem_obj,
632	__unused memory_object_offset_t	offset,
633	__unused memory_object_cluster_size_t		data_cnt)
634{
635	panic("vnode_pager_data_initialize");
636	return KERN_FAILURE;
637}
638
639kern_return_t
640vnode_pager_data_unlock(
641	__unused memory_object_t		mem_obj,
642	__unused memory_object_offset_t	offset,
643	__unused memory_object_size_t		size,
644	__unused vm_prot_t		desired_access)
645{
646	return KERN_FAILURE;
647}
648
649kern_return_t
650vnode_pager_get_isinuse(
651	memory_object_t		mem_obj,
652	uint32_t		*isinuse)
653{
654	vnode_pager_t	vnode_object;
655
656	if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
657		*isinuse = 1;
658		return KERN_INVALID_ARGUMENT;
659	}
660
661	vnode_object = vnode_pager_lookup(mem_obj);
662
663	*isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
664	return KERN_SUCCESS;
665}
666
667kern_return_t
668vnode_pager_check_hard_throttle(
669	memory_object_t		mem_obj,
670	uint32_t		*limit,
671	uint32_t		hard_throttle)
672{
673	vnode_pager_t	vnode_object;
674
675	if (mem_obj->mo_pager_ops != &vnode_pager_ops)
676		return KERN_INVALID_ARGUMENT;
677
678	vnode_object = vnode_pager_lookup(mem_obj);
679
680	(void)vnode_pager_return_hard_throttle_limit(vnode_object->vnode_handle, limit, hard_throttle);
681	return KERN_SUCCESS;
682}
683
684kern_return_t
685vnode_pager_get_isSSD(
686	memory_object_t		mem_obj,
687	boolean_t		*isSSD)
688{
689	vnode_pager_t	vnode_object;
690
691	if (mem_obj->mo_pager_ops != &vnode_pager_ops)
692		return KERN_INVALID_ARGUMENT;
693
694	vnode_object = vnode_pager_lookup(mem_obj);
695
696	*isSSD = vnode_pager_isSSD(vnode_object->vnode_handle);
697	return KERN_SUCCESS;
698}
699
700kern_return_t
701vnode_pager_get_object_size(
702	memory_object_t		mem_obj,
703	memory_object_offset_t	*length)
704{
705	vnode_pager_t	vnode_object;
706
707	if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
708		*length = 0;
709		return KERN_INVALID_ARGUMENT;
710	}
711
712	vnode_object = vnode_pager_lookup(mem_obj);
713
714	*length = vnode_pager_get_filesize(vnode_object->vnode_handle);
715	return KERN_SUCCESS;
716}
717
718kern_return_t
719vnode_pager_get_object_pathname(
720	memory_object_t		mem_obj,
721	char			*pathname,
722	vm_size_t		*length_p)
723{
724	vnode_pager_t	vnode_object;
725
726	if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
727		return KERN_INVALID_ARGUMENT;
728	}
729
730	vnode_object = vnode_pager_lookup(mem_obj);
731
732	return vnode_pager_get_pathname(vnode_object->vnode_handle,
733					pathname,
734					length_p);
735}
736
737kern_return_t
738vnode_pager_get_object_filename(
739	memory_object_t	mem_obj,
740	const char	**filename)
741{
742	vnode_pager_t	vnode_object;
743
744	if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
745		return KERN_INVALID_ARGUMENT;
746	}
747
748	vnode_object = vnode_pager_lookup(mem_obj);
749
750	return vnode_pager_get_filename(vnode_object->vnode_handle,
751					filename);
752}
753
754kern_return_t
755vnode_pager_get_object_cs_blobs(
756	memory_object_t	mem_obj,
757	void		**blobs)
758{
759	vnode_pager_t	vnode_object;
760
761	if (mem_obj == MEMORY_OBJECT_NULL ||
762	    mem_obj->mo_pager_ops != &vnode_pager_ops) {
763		return KERN_INVALID_ARGUMENT;
764	}
765
766	vnode_object = vnode_pager_lookup(mem_obj);
767
768	return vnode_pager_get_cs_blobs(vnode_object->vnode_handle,
769					blobs);
770}
771
772#if CHECK_CS_VALIDATION_BITMAP
773kern_return_t
774vnode_pager_cs_check_validation_bitmap(
775	memory_object_t	mem_obj,
776	memory_object_offset_t	offset,
777        int		optype	)
778{
779	vnode_pager_t	vnode_object;
780
781	if (mem_obj == MEMORY_OBJECT_NULL ||
782	    mem_obj->mo_pager_ops != &vnode_pager_ops) {
783		return KERN_INVALID_ARGUMENT;
784	}
785
786	vnode_object = vnode_pager_lookup(mem_obj);
787	return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype );
788}
789#endif /* CHECK_CS_VALIDATION_BITMAP */
790
791/*
792 *
793 */
794kern_return_t
795vnode_pager_data_request(
796	memory_object_t		mem_obj,
797	memory_object_offset_t	offset,
798	__unused memory_object_cluster_size_t	length,
799	__unused vm_prot_t	desired_access,
800	memory_object_fault_info_t	fault_info)
801{
802	vnode_pager_t		vnode_object;
803	memory_object_offset_t	base_offset;
804	vm_size_t		size;
805	uint32_t		io_streaming = 0;
806
807	vnode_object = vnode_pager_lookup(mem_obj);
808
809	size = MAX_UPL_TRANSFER * PAGE_SIZE;
810	base_offset = offset;
811
812	if (memory_object_cluster_size(vnode_object->control_handle, &base_offset, &size, &io_streaming, fault_info) != KERN_SUCCESS)
813	        size = PAGE_SIZE;
814
815	assert(offset >= base_offset &&
816	       offset < base_offset + size);
817
818	return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
819}
820
821/*
822 *
823 */
824void
825vnode_pager_reference(
826	memory_object_t		mem_obj)
827{
828	register vnode_pager_t	vnode_object;
829	unsigned int		new_ref_count;
830
831	vnode_object = vnode_pager_lookup(mem_obj);
832	new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1);
833	assert(new_ref_count > 1);
834}
835
836/*
837 *
838 */
839void
840vnode_pager_deallocate(
841	memory_object_t		mem_obj)
842{
843	register vnode_pager_t	vnode_object;
844
845	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
846
847	vnode_object = vnode_pager_lookup(mem_obj);
848
849	if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) {
850		if (vnode_object->vnode_handle != NULL) {
851			vnode_pager_vrele(vnode_object->vnode_handle);
852		}
853		zfree(vnode_pager_zone, vnode_object);
854	}
855	return;
856}
857
858/*
859 *
860 */
861kern_return_t
862vnode_pager_terminate(
863#if !DEBUG
864	__unused
865#endif
866	memory_object_t	mem_obj)
867{
868	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
869
870	return(KERN_SUCCESS);
871}
872
873/*
874 *
875 */
876kern_return_t
877vnode_pager_synchronize(
878	memory_object_t		mem_obj,
879	memory_object_offset_t	offset,
880	memory_object_size_t		length,
881	__unused vm_sync_t		sync_flags)
882{
883	register vnode_pager_t	vnode_object;
884
885	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj));
886
887	vnode_object = vnode_pager_lookup(mem_obj);
888
889	memory_object_synchronize_completed(vnode_object->control_handle, offset, length);
890
891	return (KERN_SUCCESS);
892}
893
894/*
895 *
896 */
897kern_return_t
898vnode_pager_map(
899	memory_object_t		mem_obj,
900	vm_prot_t		prot)
901{
902	vnode_pager_t		vnode_object;
903	int			ret;
904	kern_return_t		kr;
905
906	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
907
908	vnode_object = vnode_pager_lookup(mem_obj);
909
910	ret = ubc_map(vnode_object->vnode_handle, prot);
911
912	if (ret != 0) {
913		kr = KERN_FAILURE;
914	} else {
915		kr = KERN_SUCCESS;
916	}
917
918	return kr;
919}
920
921kern_return_t
922vnode_pager_last_unmap(
923	memory_object_t		mem_obj)
924{
925	register vnode_pager_t	vnode_object;
926
927	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
928
929	vnode_object = vnode_pager_lookup(mem_obj);
930
931	ubc_unmap(vnode_object->vnode_handle);
932	return KERN_SUCCESS;
933}
934
935
936
937/*
938 *
939 */
940void
941vnode_pager_cluster_write(
942	vnode_pager_t		vnode_object,
943	vm_object_offset_t	offset,
944	vm_size_t		cnt,
945	vm_object_offset_t   *	resid_offset,
946	int		     *  io_error,
947	int			upl_flags)
948{
949	vm_size_t	size;
950	int		errno;
951
952	if (upl_flags & UPL_MSYNC) {
953
954	        upl_flags |= UPL_VNODE_PAGER;
955
956		if ( (upl_flags & UPL_IOSYNC) && io_error)
957		        upl_flags |= UPL_KEEPCACHED;
958
959	        while (cnt) {
960			size = (cnt < (PAGE_SIZE * MAX_UPL_TRANSFER)) ? cnt : (PAGE_SIZE * MAX_UPL_TRANSFER); /* effective max */
961
962			assert((upl_size_t) size == size);
963			vnode_pageout(vnode_object->vnode_handle,
964				      NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
965
966			if ( (upl_flags & UPL_KEEPCACHED) ) {
967			        if ( (*io_error = errno) )
968				        break;
969			}
970			cnt    -= size;
971			offset += size;
972		}
973		if (resid_offset)
974			*resid_offset = offset;
975
976	} else {
977	        vm_object_offset_t      vnode_size;
978	        vm_object_offset_t	base_offset;
979
980	        /*
981		 * this is the pageout path
982		 */
983		vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
984
985		if (vnode_size > (offset + PAGE_SIZE)) {
986		        /*
987			 * preset the maximum size of the cluster
988			 * and put us on a nice cluster boundary...
989			 * and then clip the size to insure we
990			 * don't request past the end of the underlying file
991			 */
992		        size = PAGE_SIZE * MAX_UPL_TRANSFER;
993		        base_offset = offset & ~((signed)(size - 1));
994
995			if ((base_offset + size) > vnode_size)
996			        size = round_page(((vm_size_t)(vnode_size - base_offset)));
997		} else {
998		        /*
999			 * we've been requested to page out a page beyond the current
1000			 * end of the 'file'... don't try to cluster in this case...
1001			 * we still need to send this page through because it might
1002			 * be marked precious and the underlying filesystem may need
1003			 * to do something with it (besides page it out)...
1004			 */
1005		        base_offset = offset;
1006			size = PAGE_SIZE;
1007		}
1008		assert((upl_size_t) size == size);
1009	        vnode_pageout(vnode_object->vnode_handle,
1010			      NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size, UPL_VNODE_PAGER, NULL);
1011	}
1012}
1013
1014
1015/*
1016 *
1017 */
1018kern_return_t
1019vnode_pager_cluster_read(
1020	vnode_pager_t		vnode_object,
1021	vm_object_offset_t	base_offset,
1022	vm_object_offset_t	offset,
1023	uint32_t		io_streaming,
1024	vm_size_t		cnt)
1025{
1026	int		local_error = 0;
1027	int		kret;
1028	int		flags = 0;
1029
1030	assert(! (cnt & PAGE_MASK));
1031
1032	if (io_streaming)
1033		flags |= UPL_IOSTREAMING;
1034
1035	assert((upl_size_t) cnt == cnt);
1036	kret = vnode_pagein(vnode_object->vnode_handle,
1037			    (upl_t) NULL,
1038			    (upl_offset_t) (offset - base_offset),
1039			    base_offset,
1040			    (upl_size_t) cnt,
1041			    flags,
1042			    &local_error);
1043/*
1044	if(kret == PAGER_ABSENT) {
1045	Need to work out the defs here, 1 corresponds to PAGER_ABSENT
1046	defined in bsd/vm/vm_pager.h  However, we should not be including
1047	that file here it is a layering violation.
1048*/
1049	if (kret == 1) {
1050		int	uplflags;
1051		upl_t	upl = NULL;
1052		unsigned int	count = 0;
1053		kern_return_t	kr;
1054
1055		uplflags = (UPL_NO_SYNC |
1056			    UPL_CLEAN_IN_PLACE |
1057			    UPL_SET_INTERNAL);
1058		count = 0;
1059		assert((upl_size_t) cnt == cnt);
1060		kr = memory_object_upl_request(vnode_object->control_handle,
1061					       base_offset, (upl_size_t) cnt,
1062					       &upl, NULL, &count, uplflags);
1063		if (kr == KERN_SUCCESS) {
1064			upl_abort(upl, 0);
1065			upl_deallocate(upl);
1066		} else {
1067			/*
1068			 * We couldn't gather the page list, probably
1069			 * because the memory object doesn't have a link
1070			 * to a VM object anymore (forced unmount, for
1071			 * example).  Just return an error to the vm_fault()
1072			 * path and let it handle it.
1073			 */
1074		}
1075
1076		return KERN_FAILURE;
1077	}
1078
1079	return KERN_SUCCESS;
1080
1081}
1082
1083
1084/*
1085 *
1086 */
1087void
1088vnode_pager_release_from_cache(
1089		int	*cnt)
1090{
1091	memory_object_free_from_cache(
1092			&realhost, &vnode_pager_ops, cnt);
1093}
1094
1095/*
1096 *
1097 */
1098vnode_pager_t
1099vnode_object_create(
1100        struct vnode *vp)
1101{
1102	register vnode_pager_t  vnode_object;
1103
1104	vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
1105	if (vnode_object == VNODE_PAGER_NULL)
1106		return(VNODE_PAGER_NULL);
1107
1108	/*
1109	 * The vm_map call takes both named entry ports and raw memory
1110	 * objects in the same parameter.  We need to make sure that
1111	 * vm_map does not see this object as a named entry port.  So,
1112	 * we reserve the first word in the object for a fake ip_kotype
1113	 * setting - that will tell vm_map to use it as a memory object.
1114	 */
1115	vnode_object->pager_ops = &vnode_pager_ops;
1116	vnode_object->pager_ikot = IKOT_MEMORY_OBJECT;
1117	vnode_object->ref_count = 1;
1118	vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL;
1119	vnode_object->vnode_handle = vp;
1120
1121	return(vnode_object);
1122}
1123
1124/*
1125 *
1126 */
1127vnode_pager_t
1128vnode_pager_lookup(
1129	memory_object_t	 name)
1130{
1131	vnode_pager_t	vnode_object;
1132
1133	vnode_object = (vnode_pager_t)name;
1134	assert(vnode_object->pager_ops == &vnode_pager_ops);
1135	return (vnode_object);
1136}
1137
1138
1139/*********************** proc_info implementation *************/
1140
1141#include <sys/bsdtask_info.h>
1142
1143static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
1144
1145
1146int
1147fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t  *vid)
1148{
1149
1150	vm_map_t map;
1151	vm_map_offset_t	address = (vm_map_offset_t )arg;
1152	vm_map_entry_t		tmp_entry;
1153	vm_map_entry_t		entry;
1154	vm_map_offset_t		start;
1155	vm_region_extended_info_data_t extended;
1156	vm_region_top_info_data_t top;
1157
1158	    task_lock(task);
1159	    map = task->map;
1160	    if (map == VM_MAP_NULL)
1161	    {
1162			task_unlock(task);
1163			return(0);
1164	    }
1165	    vm_map_reference(map);
1166	    task_unlock(task);
1167
1168	    vm_map_lock_read(map);
1169
1170	    start = address;
1171	    if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1172		if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1173			vm_map_unlock_read(map);
1174	    		vm_map_deallocate(map);
1175		   	return(0);
1176		}
1177	    } else {
1178		entry = tmp_entry;
1179	    }
1180
1181	    start = entry->vme_start;
1182
1183	    pinfo->pri_offset = entry->offset;
1184	    pinfo->pri_protection = entry->protection;
1185	    pinfo->pri_max_protection = entry->max_protection;
1186	    pinfo->pri_inheritance = entry->inheritance;
1187	    pinfo->pri_behavior = entry->behavior;
1188	    pinfo->pri_user_wired_count = entry->user_wired_count;
1189	    pinfo->pri_user_tag = entry->alias;
1190
1191	    if (entry->is_sub_map) {
1192		pinfo->pri_flags |= PROC_REGION_SUBMAP;
1193	    } else {
1194		if (entry->is_shared)
1195			pinfo->pri_flags |= PROC_REGION_SHARED;
1196	    }
1197
1198
1199	    extended.protection = entry->protection;
1200	    extended.user_tag = entry->alias;
1201	    extended.pages_resident = 0;
1202	    extended.pages_swapped_out = 0;
1203	    extended.pages_shared_now_private = 0;
1204	    extended.pages_dirtied = 0;
1205	    extended.external_pager = 0;
1206	    extended.shadow_depth = 0;
1207
1208	    vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended);
1209
1210	    if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
1211	            extended.share_mode = SM_PRIVATE;
1212
1213	    top.private_pages_resident = 0;
1214	    top.shared_pages_resident = 0;
1215	    vm_map_region_top_walk(entry, &top);
1216
1217
1218	    pinfo->pri_pages_resident = extended.pages_resident;
1219	    pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1220	    pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1221	    pinfo->pri_pages_dirtied = extended.pages_dirtied;
1222	    pinfo->pri_ref_count = extended.ref_count;
1223	    pinfo->pri_shadow_depth = extended.shadow_depth;
1224	    pinfo->pri_share_mode = extended.share_mode;
1225
1226	    pinfo->pri_private_pages_resident = top.private_pages_resident;
1227	    pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1228	    pinfo->pri_obj_id = top.obj_id;
1229
1230	    pinfo->pri_address = (uint64_t)start;
1231	    pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1232	    pinfo->pri_depth = 0;
1233
1234	    if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1235		*vnodeaddr = (uintptr_t)0;
1236
1237		if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
1238			vm_map_unlock_read(map);
1239	    		vm_map_deallocate(map);
1240			return(1);
1241		}
1242	    }
1243
1244	    vm_map_unlock_read(map);
1245	    vm_map_deallocate(map);
1246	    return(1);
1247}
1248
1249static int
1250fill_vnodeinfoforaddr(
1251	vm_map_entry_t			entry,
1252	uintptr_t * vnodeaddr,
1253	uint32_t * vid)
1254{
1255	vm_object_t	top_object, object;
1256	memory_object_t memory_object;
1257	memory_object_pager_ops_t pager_ops;
1258	kern_return_t	kr;
1259	int		shadow_depth;
1260
1261
1262	if (entry->is_sub_map) {
1263		return(0);
1264	} else {
1265		/*
1266		 * The last object in the shadow chain has the
1267		 * relevant pager information.
1268		 */
1269		top_object = entry->object.vm_object;
1270		if (top_object == VM_OBJECT_NULL) {
1271			object = VM_OBJECT_NULL;
1272			shadow_depth = 0;
1273		} else {
1274			vm_object_lock(top_object);
1275			for (object = top_object, shadow_depth = 0;
1276			     object->shadow != VM_OBJECT_NULL;
1277			     object = object->shadow, shadow_depth++) {
1278				vm_object_lock(object->shadow);
1279				vm_object_unlock(object);
1280			}
1281		}
1282	}
1283
1284	if (object == VM_OBJECT_NULL) {
1285		return(0);
1286	} else if (object->internal) {
1287		vm_object_unlock(object);
1288		return(0);
1289	} else if (! object->pager_ready ||
1290		   object->terminating ||
1291		   ! object->alive) {
1292		vm_object_unlock(object);
1293		return(0);
1294	} else {
1295		memory_object = object->pager;
1296		pager_ops = memory_object->mo_pager_ops;
1297		if (pager_ops == &vnode_pager_ops) {
1298			kr = vnode_pager_get_object_vnode(
1299				memory_object,
1300				vnodeaddr, vid);
1301			if (kr != KERN_SUCCESS) {
1302				vm_object_unlock(object);
1303				return(0);
1304			}
1305		} else {
1306			vm_object_unlock(object);
1307			return(0);
1308		}
1309	}
1310	vm_object_unlock(object);
1311	return(1);
1312}
1313
1314kern_return_t
1315vnode_pager_get_object_vnode (
1316	memory_object_t		mem_obj,
1317	uintptr_t * vnodeaddr,
1318	uint32_t * vid)
1319{
1320	vnode_pager_t	vnode_object;
1321
1322	vnode_object = vnode_pager_lookup(mem_obj);
1323	if (vnode_object->vnode_handle)  {
1324		*vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
1325		*vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1326
1327		return(KERN_SUCCESS);
1328	}
1329
1330	return(KERN_FAILURE);
1331}
1332
1333
1334/*
1335 * Find the underlying vnode object for the given vm_map_entry.  If found, return with the
1336 * object locked, otherwise return NULL with nothing locked.
1337 */
1338
1339vm_object_t
1340find_vnode_object(
1341	vm_map_entry_t	entry
1342)
1343{
1344	vm_object_t			top_object, object;
1345	memory_object_t 		memory_object;
1346	memory_object_pager_ops_t	pager_ops;
1347
1348	if (!entry->is_sub_map) {
1349
1350		/*
1351		 * The last object in the shadow chain has the
1352		 * relevant pager information.
1353		 */
1354
1355		top_object = entry->object.vm_object;
1356
1357		if (top_object) {
1358			vm_object_lock(top_object);
1359
1360			for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
1361				vm_object_lock(object->shadow);
1362				vm_object_unlock(object);
1363			}
1364
1365			if (object && !object->internal && object->pager_ready && !object->terminating &&
1366			    object->alive) {
1367				memory_object = object->pager;
1368				pager_ops = memory_object->mo_pager_ops;
1369
1370				/*
1371				 * If this object points to the vnode_pager_ops, then we found what we're
1372				 * looking for.  Otherwise, this vm_map_entry doesn't have an underlying
1373				 * vnode and so we fall through to the bottom and return NULL.
1374				 */
1375
1376				if (pager_ops == &vnode_pager_ops)
1377					return object;		/* we return with the object locked */
1378			}
1379
1380			vm_object_unlock(object);
1381		}
1382
1383	}
1384
1385	return(VM_OBJECT_NULL);
1386}
1387