1/*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/errno.h>
30
31#include <mach/mach_types.h>
32#include <mach/mach_traps.h>
33#include <mach/host_priv.h>
34#include <mach/kern_return.h>
35#include <mach/memory_object_control.h>
36#include <mach/memory_object_types.h>
37#include <mach/port.h>
38#include <mach/policy.h>
39#include <mach/upl.h>
40#include <mach/thread_act.h>
41
42#include <kern/assert.h>
43#include <kern/host.h>
44#include <kern/thread.h>
45
46#include <ipc/ipc_port.h>
47#include <ipc/ipc_space.h>
48
49#include <default_pager/default_pager_types.h>
50#include <default_pager/default_pager_object_server.h>
51
52#include <vm/vm_map.h>
53#include <vm/vm_pageout.h>
54#include <vm/memory_object.h>
55#include <vm/vm_pageout.h>
56#include <vm/vm_protos.h>
57#include <vm/vm_purgeable_internal.h>
58
59
60/* BSD VM COMPONENT INTERFACES */
61int
62get_map_nentries(
63	vm_map_t);
64
65vm_offset_t
66get_map_start(
67	vm_map_t);
68
69vm_offset_t
70get_map_end(
71	vm_map_t);
72
73/*
74 *
75 */
76int
77get_map_nentries(
78	vm_map_t map)
79{
80	return(map->hdr.nentries);
81}
82
83mach_vm_offset_t
84mach_get_vm_start(vm_map_t map)
85{
86	return( vm_map_first_entry(map)->vme_start);
87}
88
89mach_vm_offset_t
90mach_get_vm_end(vm_map_t map)
91{
92	return( vm_map_last_entry(map)->vme_end);
93}
94
95/*
96 * BSD VNODE PAGER
97 */
98
99const struct memory_object_pager_ops vnode_pager_ops = {
100	vnode_pager_reference,
101	vnode_pager_deallocate,
102	vnode_pager_init,
103	vnode_pager_terminate,
104	vnode_pager_data_request,
105	vnode_pager_data_return,
106	vnode_pager_data_initialize,
107	vnode_pager_data_unlock,
108	vnode_pager_synchronize,
109	vnode_pager_map,
110	vnode_pager_last_unmap,
111	NULL, /* data_reclaim */
112	"vnode pager"
113};
114
115typedef struct vnode_pager {
116	struct ipc_object_header	pager_header;	/* fake ip_kotype()		*/
117	memory_object_pager_ops_t pager_ops;	/* == &vnode_pager_ops	     */
118	unsigned int		ref_count;	/* reference count	     */
119	memory_object_control_t control_handle;	/* mem object control handle */
120	struct vnode		*vnode_handle;	/* vnode handle 	     */
121} *vnode_pager_t;
122
123#define pager_ikot pager_header.io_bits
124
125ipc_port_t
126trigger_name_to_port(			/* forward */
127	mach_port_t);
128
129kern_return_t
130vnode_pager_cluster_read(		/* forward */
131	vnode_pager_t,
132	vm_object_offset_t,
133	vm_object_offset_t,
134	uint32_t,
135	vm_size_t);
136
137void
138vnode_pager_cluster_write(		/* forward */
139	vnode_pager_t,
140	vm_object_offset_t,
141	vm_size_t,
142	vm_object_offset_t *,
143	int *,
144	int);
145
146
147vnode_pager_t
148vnode_object_create(			/* forward */
149	struct vnode *);
150
151vnode_pager_t
152vnode_pager_lookup(			/* forward */
153	memory_object_t);
154
155zone_t	vnode_pager_zone;
156
157
158#define	VNODE_PAGER_NULL	((vnode_pager_t) 0)
159
160/* TODO: Should be set dynamically by vnode_pager_init() */
161#define CLUSTER_SHIFT 	1
162
163/* TODO: Should be set dynamically by vnode_pager_bootstrap() */
164#define	MAX_VNODE		10000
165
166
167#if DEBUG
168int pagerdebug=0;
169
170#define PAGER_ALL		0xffffffff
171#define	PAGER_INIT		0x00000001
172#define	PAGER_PAGEIN	0x00000002
173
174#define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
175#else
176#define PAGER_DEBUG(LEVEL, A)
177#endif
178
179extern int proc_resetpcontrol(int);
180
181#if DEVELOPMENT || DEBUG
182extern unsigned long vm_cs_validated_resets;
183#endif
184
185/*
186 *	Routine:	mach_macx_triggers
187 *	Function:
188 *		Syscall interface to set the call backs for low and
189 *		high water marks.
190 */
191int
192mach_macx_triggers(
193	struct macx_triggers_args *args)
194{
195	int	hi_water = args->hi_water;
196	int	low_water = args->low_water;
197	int	flags = args->flags;
198	mach_port_t	trigger_name = args->alert_port;
199	kern_return_t kr;
200	memory_object_default_t	default_pager;
201	ipc_port_t		trigger_port;
202
203	default_pager = MEMORY_OBJECT_DEFAULT_NULL;
204	kr = host_default_memory_manager(host_priv_self(),
205					&default_pager, 0);
206	if(kr != KERN_SUCCESS) {
207		return EINVAL;
208	}
209
210	if (((flags & SWAP_ENCRYPT_ON) && (flags & SWAP_ENCRYPT_OFF)) ||
211	    ((flags & SWAP_COMPACT_ENABLE) && (flags & SWAP_COMPACT_DISABLE))) {
212		/* can't have it both ways */
213		return EINVAL;
214	}
215
216	if (default_pager_init_flag == 0) {
217               start_def_pager(NULL);
218               default_pager_init_flag = 1;
219	}
220
221	if (flags & SWAP_ENCRYPT_ON) {
222		/* ENCRYPTED SWAP: tell default_pager to encrypt */
223		default_pager_triggers(default_pager,
224				       0, 0,
225				       SWAP_ENCRYPT_ON,
226				       IP_NULL);
227	} else if (flags & SWAP_ENCRYPT_OFF) {
228		/* ENCRYPTED SWAP: tell default_pager not to encrypt */
229		default_pager_triggers(default_pager,
230				       0, 0,
231				       SWAP_ENCRYPT_OFF,
232				       IP_NULL);
233	}
234
235	if (flags & USE_EMERGENCY_SWAP_FILE_FIRST) {
236		/*
237		 * Time to switch to the emergency segment.
238		 */
239		return default_pager_triggers(default_pager,
240					0, 0,
241					USE_EMERGENCY_SWAP_FILE_FIRST,
242					IP_NULL);
243	}
244
245	if (flags & SWAP_FILE_CREATION_ERROR) {
246		/*
247		 * For some reason, the dynamic pager failed to create a swap file.
248	 	 */
249		trigger_port = trigger_name_to_port(trigger_name);
250		if(trigger_port == NULL) {
251			return EINVAL;
252		}
253		/* trigger_port is locked and active */
254		ipc_port_make_send_locked(trigger_port);
255		ip_unlock(trigger_port);
256		default_pager_triggers(default_pager,
257					0, 0,
258					SWAP_FILE_CREATION_ERROR,
259					trigger_port);
260	}
261
262	if (flags & HI_WAT_ALERT) {
263		trigger_port = trigger_name_to_port(trigger_name);
264		if(trigger_port == NULL) {
265			return EINVAL;
266		}
267		/* trigger_port is locked and active */
268		ipc_port_make_send_locked(trigger_port);
269		ip_unlock(trigger_port);
270		default_pager_triggers(default_pager,
271				       hi_water, low_water,
272				       HI_WAT_ALERT, trigger_port);
273	}
274
275	if (flags & LO_WAT_ALERT) {
276		trigger_port = trigger_name_to_port(trigger_name);
277		if(trigger_port == NULL) {
278			return EINVAL;
279		}
280		/* trigger_port is locked and active */
281		ipc_port_make_send_locked(trigger_port);
282		ip_unlock(trigger_port);
283		default_pager_triggers(default_pager,
284				       hi_water, low_water,
285				       LO_WAT_ALERT, trigger_port);
286	}
287
288
289	if (flags & PROC_RESUME) {
290
291		/*
292		 * For this call, hi_water is used to pass in the pid of the process we want to resume
293		 * or unthrottle.  This is of course restricted to the superuser (checked inside of
294		 * proc_resetpcontrol).
295		 */
296
297		return proc_resetpcontrol(hi_water);
298	}
299
300	/*
301	 * Set thread scheduling priority and policy for the current thread
302	 * it is assumed for the time being that the thread setting the alert
303	 * is the same one which will be servicing it.
304	 *
305	 * XXX This does not belong in the kernel XXX
306	 */
307	if (flags & HI_WAT_ALERT) {
308		thread_precedence_policy_data_t		pre;
309		thread_extended_policy_data_t		ext;
310
311		ext.timeshare = FALSE;
312		pre.importance = INT32_MAX;
313
314		thread_policy_set(current_thread(),
315				  THREAD_EXTENDED_POLICY,
316				  (thread_policy_t)&ext,
317				  THREAD_EXTENDED_POLICY_COUNT);
318
319		thread_policy_set(current_thread(),
320				  THREAD_PRECEDENCE_POLICY,
321				  (thread_policy_t)&pre,
322				  THREAD_PRECEDENCE_POLICY_COUNT);
323
324		current_thread()->options |= TH_OPT_VMPRIV;
325	}
326
327	if (flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE)) {
328		return macx_backing_store_compaction(flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE));
329	}
330
331	return 0;
332}
333
334/*
335 *
336 */
337ipc_port_t
338trigger_name_to_port(
339	mach_port_t	trigger_name)
340{
341	ipc_port_t	trigger_port;
342	ipc_space_t	space;
343
344	if (trigger_name == 0)
345		return (NULL);
346
347	space  = current_space();
348	if(ipc_port_translate_receive(space, CAST_MACH_PORT_TO_NAME(trigger_name),
349						&trigger_port) != KERN_SUCCESS)
350		return (NULL);
351	return trigger_port;
352}
353
354
355extern int	uiomove64(addr64_t, int, void *);
356#define	MAX_RUN	32
357
358int
359memory_object_control_uiomove(
360	memory_object_control_t	control,
361	memory_object_offset_t	offset,
362	void		*	uio,
363	int			start_offset,
364	int			io_requested,
365	int			mark_dirty,
366	int			take_reference)
367{
368	vm_object_t		object;
369	vm_page_t		dst_page;
370	int			xsize;
371	int			retval = 0;
372	int			cur_run;
373	int			cur_needed;
374	int			i;
375	int			orig_offset;
376	vm_page_t		page_run[MAX_RUN];
377
378	object = memory_object_control_to_vm_object(control);
379	if (object == VM_OBJECT_NULL) {
380		return (0);
381	}
382	assert(!object->internal);
383
384	vm_object_lock(object);
385
386	if (mark_dirty && object->copy != VM_OBJECT_NULL) {
387		/*
388		 * We can't modify the pages without honoring
389		 * copy-on-write obligations first, so fall off
390		 * this optimized path and fall back to the regular
391		 * path.
392		 */
393		vm_object_unlock(object);
394		return 0;
395	}
396	orig_offset = start_offset;
397
398	while (io_requested && retval == 0) {
399
400		cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE;
401
402		if (cur_needed > MAX_RUN)
403		        cur_needed = MAX_RUN;
404
405		for (cur_run = 0; cur_run < cur_needed; ) {
406
407		        if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
408			        break;
409
410
411			if (dst_page->busy || dst_page->cleaning) {
412				/*
413				 * someone else is playing with the page... if we've
414				 * already collected pages into this run, go ahead
415				 * and process now, we can't block on this
416				 * page while holding other pages in the BUSY state
417				 * otherwise we will wait
418				 */
419				if (cur_run)
420					break;
421				PAGE_SLEEP(object, dst_page, THREAD_UNINT);
422				continue;
423			}
424			if (dst_page->laundry) {
425				dst_page->pageout = FALSE;
426
427				vm_pageout_steal_laundry(dst_page, FALSE);
428			}
429			/*
430			 * this routine is only called when copying
431			 * to/from real files... no need to consider
432			 * encrypted swap pages
433			 */
434			assert(!dst_page->encrypted);
435
436		        if (mark_dirty) {
437				SET_PAGE_DIRTY(dst_page, FALSE);
438				if (dst_page->cs_validated &&
439				    !dst_page->cs_tainted) {
440					/*
441					 * CODE SIGNING:
442					 * We're modifying a code-signed
443					 * page: force revalidate
444					 */
445					dst_page->cs_validated = FALSE;
446#if DEVELOPMENT || DEBUG
447                                        vm_cs_validated_resets++;
448#endif
449					pmap_disconnect(dst_page->phys_page);
450				}
451			}
452			dst_page->busy = TRUE;
453
454			page_run[cur_run++] = dst_page;
455
456			offset += PAGE_SIZE_64;
457		}
458		if (cur_run == 0)
459		        /*
460			 * we hit a 'hole' in the cache or
461			 * a page we don't want to try to handle,
462			 * so bail at this point
463			 * we'll unlock the object below
464			 */
465		        break;
466		vm_object_unlock(object);
467
468		for (i = 0; i < cur_run; i++) {
469
470		        dst_page = page_run[i];
471
472			if ((xsize = PAGE_SIZE - start_offset) > io_requested)
473			        xsize = io_requested;
474
475			if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) )
476			        break;
477
478			io_requested -= xsize;
479			start_offset = 0;
480		}
481		vm_object_lock(object);
482
483		/*
484		 * if we have more than 1 page to work on
485		 * in the current run, or the original request
486		 * started at offset 0 of the page, or we're
487		 * processing multiple batches, we will move
488		 * the pages to the tail of the inactive queue
489		 * to implement an LRU for read/write accesses
490		 *
491		 * the check for orig_offset == 0 is there to
492		 * mitigate the cost of small (< page_size) requests
493		 * to the same page (this way we only move it once)
494		 */
495		if (take_reference && (cur_run > 1 || orig_offset == 0)) {
496
497			vm_page_lockspin_queues();
498
499			for (i = 0; i < cur_run; i++)
500				vm_page_lru(page_run[i]);
501
502			vm_page_unlock_queues();
503		}
504		for (i = 0; i < cur_run; i++) {
505		        dst_page = page_run[i];
506
507			/*
508			 * someone is explicitly referencing this page...
509			 * update clustered and speculative state
510			 *
511			 */
512			VM_PAGE_CONSUME_CLUSTERED(dst_page);
513
514			PAGE_WAKEUP_DONE(dst_page);
515		}
516		orig_offset = 0;
517	}
518	vm_object_unlock(object);
519
520	return (retval);
521}
522
523
524/*
525 *
526 */
527void
528vnode_pager_bootstrap(void)
529{
530	register vm_size_t      size;
531
532	size = (vm_size_t) sizeof(struct vnode_pager);
533	vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
534				PAGE_SIZE, "vnode pager structures");
535	zone_change(vnode_pager_zone, Z_CALLERACCT, FALSE);
536	zone_change(vnode_pager_zone, Z_NOENCRYPT, TRUE);
537
538
539#if CONFIG_CODE_DECRYPTION
540	apple_protect_pager_bootstrap();
541#endif	/* CONFIG_CODE_DECRYPTION */
542	swapfile_pager_bootstrap();
543	return;
544}
545
546/*
547 *
548 */
549memory_object_t
550vnode_pager_setup(
551	struct vnode	*vp,
552	__unused memory_object_t	pager)
553{
554	vnode_pager_t	vnode_object;
555
556	vnode_object = vnode_object_create(vp);
557	if (vnode_object == VNODE_PAGER_NULL)
558		panic("vnode_pager_setup: vnode_object_create() failed");
559	return((memory_object_t)vnode_object);
560}
561
562/*
563 *
564 */
565kern_return_t
566vnode_pager_init(memory_object_t mem_obj,
567		memory_object_control_t control,
568#if !DEBUG
569		 __unused
570#endif
571		 memory_object_cluster_size_t pg_size)
572{
573	vnode_pager_t   vnode_object;
574	kern_return_t   kr;
575	memory_object_attr_info_data_t  attributes;
576
577
578	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
579
580	if (control == MEMORY_OBJECT_CONTROL_NULL)
581		return KERN_INVALID_ARGUMENT;
582
583	vnode_object = vnode_pager_lookup(mem_obj);
584
585	memory_object_control_reference(control);
586
587	vnode_object->control_handle = control;
588
589	attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY;
590	/* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
591	attributes.cluster_size = (1 << (PAGE_SHIFT));
592	attributes.may_cache_object = TRUE;
593	attributes.temporary = TRUE;
594
595	kr = memory_object_change_attributes(
596					control,
597					MEMORY_OBJECT_ATTRIBUTE_INFO,
598					(memory_object_info_t) &attributes,
599					MEMORY_OBJECT_ATTR_INFO_COUNT);
600	if (kr != KERN_SUCCESS)
601		panic("vnode_pager_init: memory_object_change_attributes() failed");
602
603	return(KERN_SUCCESS);
604}
605
606/*
607 *
608 */
609kern_return_t
610vnode_pager_data_return(
611        memory_object_t		mem_obj,
612        memory_object_offset_t	offset,
613        memory_object_cluster_size_t		data_cnt,
614        memory_object_offset_t	*resid_offset,
615	int			*io_error,
616	__unused boolean_t		dirty,
617	__unused boolean_t		kernel_copy,
618	int			upl_flags)
619{
620	register vnode_pager_t	vnode_object;
621
622	vnode_object = vnode_pager_lookup(mem_obj);
623
624	vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags);
625
626	return KERN_SUCCESS;
627}
628
629kern_return_t
630vnode_pager_data_initialize(
631	__unused memory_object_t		mem_obj,
632	__unused memory_object_offset_t	offset,
633	__unused memory_object_cluster_size_t		data_cnt)
634{
635	panic("vnode_pager_data_initialize");
636	return KERN_FAILURE;
637}
638
639kern_return_t
640vnode_pager_data_unlock(
641	__unused memory_object_t		mem_obj,
642	__unused memory_object_offset_t	offset,
643	__unused memory_object_size_t		size,
644	__unused vm_prot_t		desired_access)
645{
646	return KERN_FAILURE;
647}
648
649kern_return_t
650vnode_pager_get_isinuse(
651	memory_object_t		mem_obj,
652	uint32_t		*isinuse)
653{
654	vnode_pager_t	vnode_object;
655
656	if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
657		*isinuse = 1;
658		return KERN_INVALID_ARGUMENT;
659	}
660
661	vnode_object = vnode_pager_lookup(mem_obj);
662
663	*isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
664	return KERN_SUCCESS;
665}
666
667kern_return_t
668vnode_pager_get_throttle_io_limit(
669	memory_object_t		mem_obj,
670	uint32_t		*limit)
671{
672	vnode_pager_t	vnode_object;
673
674	if (mem_obj->mo_pager_ops != &vnode_pager_ops)
675		return KERN_INVALID_ARGUMENT;
676
677	vnode_object = vnode_pager_lookup(mem_obj);
678
679	(void)vnode_pager_return_throttle_io_limit(vnode_object->vnode_handle, limit);
680	return KERN_SUCCESS;
681}
682
683kern_return_t
684vnode_pager_get_isSSD(
685	memory_object_t		mem_obj,
686	boolean_t		*isSSD)
687{
688	vnode_pager_t	vnode_object;
689
690	if (mem_obj->mo_pager_ops != &vnode_pager_ops)
691		return KERN_INVALID_ARGUMENT;
692
693	vnode_object = vnode_pager_lookup(mem_obj);
694
695	*isSSD = vnode_pager_isSSD(vnode_object->vnode_handle);
696	return KERN_SUCCESS;
697}
698
699kern_return_t
700vnode_pager_get_object_size(
701	memory_object_t		mem_obj,
702	memory_object_offset_t	*length)
703{
704	vnode_pager_t	vnode_object;
705
706	if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
707		*length = 0;
708		return KERN_INVALID_ARGUMENT;
709	}
710
711	vnode_object = vnode_pager_lookup(mem_obj);
712
713	*length = vnode_pager_get_filesize(vnode_object->vnode_handle);
714	return KERN_SUCCESS;
715}
716
717kern_return_t
718vnode_pager_get_object_name(
719	memory_object_t		mem_obj,
720	char			*pathname,
721	vm_size_t		pathname_len,
722	char			*filename,
723	vm_size_t		filename_len,
724	boolean_t		*truncated_path_p)
725{
726	vnode_pager_t	vnode_object;
727
728	if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
729		return KERN_INVALID_ARGUMENT;
730	}
731
732	vnode_object = vnode_pager_lookup(mem_obj);
733
734	return vnode_pager_get_name(vnode_object->vnode_handle,
735				    pathname,
736				    pathname_len,
737				    filename,
738				    filename_len,
739				    truncated_path_p);
740}
741
742kern_return_t
743vnode_pager_get_object_mtime(
744	memory_object_t		mem_obj,
745	struct timespec		*mtime,
746	struct timespec		*cs_mtime)
747{
748	vnode_pager_t	vnode_object;
749
750	if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
751		return KERN_INVALID_ARGUMENT;
752	}
753
754	vnode_object = vnode_pager_lookup(mem_obj);
755
756	return vnode_pager_get_mtime(vnode_object->vnode_handle,
757				     mtime,
758				     cs_mtime);
759}
760
761kern_return_t
762vnode_pager_get_object_cs_blobs(
763	memory_object_t	mem_obj,
764	void		**blobs)
765{
766	vnode_pager_t	vnode_object;
767
768	if (mem_obj == MEMORY_OBJECT_NULL ||
769	    mem_obj->mo_pager_ops != &vnode_pager_ops) {
770		return KERN_INVALID_ARGUMENT;
771	}
772
773	vnode_object = vnode_pager_lookup(mem_obj);
774
775	return vnode_pager_get_cs_blobs(vnode_object->vnode_handle,
776					blobs);
777}
778
779#if CHECK_CS_VALIDATION_BITMAP
780kern_return_t
781vnode_pager_cs_check_validation_bitmap(
782	memory_object_t	mem_obj,
783	memory_object_offset_t	offset,
784        int		optype	)
785{
786	vnode_pager_t	vnode_object;
787
788	if (mem_obj == MEMORY_OBJECT_NULL ||
789	    mem_obj->mo_pager_ops != &vnode_pager_ops) {
790		return KERN_INVALID_ARGUMENT;
791	}
792
793	vnode_object = vnode_pager_lookup(mem_obj);
794	return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype );
795}
796#endif /* CHECK_CS_VALIDATION_BITMAP */
797
798/*
799 *
800 */
801kern_return_t
802vnode_pager_data_request(
803	memory_object_t		mem_obj,
804	memory_object_offset_t	offset,
805	__unused memory_object_cluster_size_t	length,
806	__unused vm_prot_t	desired_access,
807	memory_object_fault_info_t	fault_info)
808{
809	vnode_pager_t		vnode_object;
810	memory_object_offset_t	base_offset;
811	vm_size_t		size;
812	uint32_t		io_streaming = 0;
813
814	vnode_object = vnode_pager_lookup(mem_obj);
815
816	size = MAX_UPL_TRANSFER * PAGE_SIZE;
817	base_offset = offset;
818
819	if (memory_object_cluster_size(vnode_object->control_handle, &base_offset, &size, &io_streaming, fault_info) != KERN_SUCCESS)
820	        size = PAGE_SIZE;
821
822	assert(offset >= base_offset &&
823	       offset < base_offset + size);
824
825	return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
826}
827
828/*
829 *
830 */
831void
832vnode_pager_reference(
833	memory_object_t		mem_obj)
834{
835	register vnode_pager_t	vnode_object;
836	unsigned int		new_ref_count;
837
838	vnode_object = vnode_pager_lookup(mem_obj);
839	new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1);
840	assert(new_ref_count > 1);
841}
842
843/*
844 *
845 */
846void
847vnode_pager_deallocate(
848	memory_object_t		mem_obj)
849{
850	register vnode_pager_t	vnode_object;
851
852	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
853
854	vnode_object = vnode_pager_lookup(mem_obj);
855
856	if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) {
857		if (vnode_object->vnode_handle != NULL) {
858			vnode_pager_vrele(vnode_object->vnode_handle);
859		}
860		zfree(vnode_pager_zone, vnode_object);
861	}
862	return;
863}
864
865/*
866 *
867 */
868kern_return_t
869vnode_pager_terminate(
870#if !DEBUG
871	__unused
872#endif
873	memory_object_t	mem_obj)
874{
875	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
876
877	return(KERN_SUCCESS);
878}
879
880/*
881 *
882 */
883kern_return_t
884vnode_pager_synchronize(
885	memory_object_t		mem_obj,
886	memory_object_offset_t	offset,
887	memory_object_size_t		length,
888	__unused vm_sync_t		sync_flags)
889{
890	register vnode_pager_t	vnode_object;
891
892	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj));
893
894	vnode_object = vnode_pager_lookup(mem_obj);
895
896	memory_object_synchronize_completed(vnode_object->control_handle, offset, length);
897
898	return (KERN_SUCCESS);
899}
900
901/*
902 *
903 */
904kern_return_t
905vnode_pager_map(
906	memory_object_t		mem_obj,
907	vm_prot_t		prot)
908{
909	vnode_pager_t		vnode_object;
910	int			ret;
911	kern_return_t		kr;
912
913	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
914
915	vnode_object = vnode_pager_lookup(mem_obj);
916
917	ret = ubc_map(vnode_object->vnode_handle, prot);
918
919	if (ret != 0) {
920		kr = KERN_FAILURE;
921	} else {
922		kr = KERN_SUCCESS;
923	}
924
925	return kr;
926}
927
928kern_return_t
929vnode_pager_last_unmap(
930	memory_object_t		mem_obj)
931{
932	register vnode_pager_t	vnode_object;
933
934	PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
935
936	vnode_object = vnode_pager_lookup(mem_obj);
937
938	ubc_unmap(vnode_object->vnode_handle);
939	return KERN_SUCCESS;
940}
941
942
943
944/*
945 *
946 */
947void
948vnode_pager_cluster_write(
949	vnode_pager_t		vnode_object,
950	vm_object_offset_t	offset,
951	vm_size_t		cnt,
952	vm_object_offset_t   *	resid_offset,
953	int		     *  io_error,
954	int			upl_flags)
955{
956	vm_size_t	size;
957	int		errno;
958
959	if (upl_flags & UPL_MSYNC) {
960
961	        upl_flags |= UPL_VNODE_PAGER;
962
963		if ( (upl_flags & UPL_IOSYNC) && io_error)
964		        upl_flags |= UPL_KEEPCACHED;
965
966	        while (cnt) {
967			size = (cnt < (PAGE_SIZE * MAX_UPL_TRANSFER)) ? cnt : (PAGE_SIZE * MAX_UPL_TRANSFER); /* effective max */
968
969			assert((upl_size_t) size == size);
970			vnode_pageout(vnode_object->vnode_handle,
971				      NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
972
973			if ( (upl_flags & UPL_KEEPCACHED) ) {
974			        if ( (*io_error = errno) )
975				        break;
976			}
977			cnt    -= size;
978			offset += size;
979		}
980		if (resid_offset)
981			*resid_offset = offset;
982
983	} else {
984	        vm_object_offset_t      vnode_size;
985	        vm_object_offset_t	base_offset;
986
987	        /*
988		 * this is the pageout path
989		 */
990		vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle);
991
992		if (vnode_size > (offset + PAGE_SIZE)) {
993		        /*
994			 * preset the maximum size of the cluster
995			 * and put us on a nice cluster boundary...
996			 * and then clip the size to insure we
997			 * don't request past the end of the underlying file
998			 */
999		        size = PAGE_SIZE * MAX_UPL_TRANSFER;
1000		        base_offset = offset & ~((signed)(size - 1));
1001
1002			if ((base_offset + size) > vnode_size)
1003			        size = round_page(((vm_size_t)(vnode_size - base_offset)));
1004		} else {
1005		        /*
1006			 * we've been requested to page out a page beyond the current
1007			 * end of the 'file'... don't try to cluster in this case...
1008			 * we still need to send this page through because it might
1009			 * be marked precious and the underlying filesystem may need
1010			 * to do something with it (besides page it out)...
1011			 */
1012		        base_offset = offset;
1013			size = PAGE_SIZE;
1014		}
1015		assert((upl_size_t) size == size);
1016	        vnode_pageout(vnode_object->vnode_handle,
1017			      NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size, UPL_VNODE_PAGER, NULL);
1018	}
1019}
1020
1021
1022/*
1023 *
1024 */
1025kern_return_t
1026vnode_pager_cluster_read(
1027	vnode_pager_t		vnode_object,
1028	vm_object_offset_t	base_offset,
1029	vm_object_offset_t	offset,
1030	uint32_t		io_streaming,
1031	vm_size_t		cnt)
1032{
1033	int		local_error = 0;
1034	int		kret;
1035	int		flags = 0;
1036
1037	assert(! (cnt & PAGE_MASK));
1038
1039	if (io_streaming)
1040		flags |= UPL_IOSTREAMING;
1041
1042	assert((upl_size_t) cnt == cnt);
1043	kret = vnode_pagein(vnode_object->vnode_handle,
1044			    (upl_t) NULL,
1045			    (upl_offset_t) (offset - base_offset),
1046			    base_offset,
1047			    (upl_size_t) cnt,
1048			    flags,
1049			    &local_error);
1050/*
1051	if(kret == PAGER_ABSENT) {
1052	Need to work out the defs here, 1 corresponds to PAGER_ABSENT
1053	defined in bsd/vm/vm_pager.h  However, we should not be including
1054	that file here it is a layering violation.
1055*/
1056	if (kret == 1) {
1057		int	uplflags;
1058		upl_t	upl = NULL;
1059		unsigned int	count = 0;
1060		kern_return_t	kr;
1061
1062		uplflags = (UPL_NO_SYNC |
1063			    UPL_CLEAN_IN_PLACE |
1064			    UPL_SET_INTERNAL);
1065		count = 0;
1066		assert((upl_size_t) cnt == cnt);
1067		kr = memory_object_upl_request(vnode_object->control_handle,
1068					       base_offset, (upl_size_t) cnt,
1069					       &upl, NULL, &count, uplflags);
1070		if (kr == KERN_SUCCESS) {
1071			upl_abort(upl, 0);
1072			upl_deallocate(upl);
1073		} else {
1074			/*
1075			 * We couldn't gather the page list, probably
1076			 * because the memory object doesn't have a link
1077			 * to a VM object anymore (forced unmount, for
1078			 * example).  Just return an error to the vm_fault()
1079			 * path and let it handle it.
1080			 */
1081		}
1082
1083		return KERN_FAILURE;
1084	}
1085
1086	return KERN_SUCCESS;
1087
1088}
1089
1090
1091/*
1092 *
1093 */
1094void
1095vnode_pager_release_from_cache(
1096		int	*cnt)
1097{
1098	memory_object_free_from_cache(
1099			&realhost, &vnode_pager_ops, cnt);
1100}
1101
1102/*
1103 *
1104 */
1105vnode_pager_t
1106vnode_object_create(
1107        struct vnode *vp)
1108{
1109	register vnode_pager_t  vnode_object;
1110
1111	vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone);
1112	if (vnode_object == VNODE_PAGER_NULL)
1113		return(VNODE_PAGER_NULL);
1114
1115	/*
1116	 * The vm_map call takes both named entry ports and raw memory
1117	 * objects in the same parameter.  We need to make sure that
1118	 * vm_map does not see this object as a named entry port.  So,
1119	 * we reserve the first word in the object for a fake ip_kotype
1120	 * setting - that will tell vm_map to use it as a memory object.
1121	 */
1122	vnode_object->pager_ops = &vnode_pager_ops;
1123	vnode_object->pager_ikot = IKOT_MEMORY_OBJECT;
1124	vnode_object->ref_count = 1;
1125	vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL;
1126	vnode_object->vnode_handle = vp;
1127
1128	return(vnode_object);
1129}
1130
1131/*
1132 *
1133 */
1134vnode_pager_t
1135vnode_pager_lookup(
1136	memory_object_t	 name)
1137{
1138	vnode_pager_t	vnode_object;
1139
1140	vnode_object = (vnode_pager_t)name;
1141	assert(vnode_object->pager_ops == &vnode_pager_ops);
1142	return (vnode_object);
1143}
1144
1145
1146/*********************** proc_info implementation *************/
1147
1148#include <sys/bsdtask_info.h>
1149
1150static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
1151
1152
1153int
1154fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t  *vid)
1155{
1156
1157	vm_map_t map;
1158	vm_map_offset_t	address = (vm_map_offset_t )arg;
1159	vm_map_entry_t		tmp_entry;
1160	vm_map_entry_t		entry;
1161	vm_map_offset_t		start;
1162	vm_region_extended_info_data_t extended;
1163	vm_region_top_info_data_t top;
1164
1165	    task_lock(task);
1166	    map = task->map;
1167	    if (map == VM_MAP_NULL)
1168	    {
1169			task_unlock(task);
1170			return(0);
1171	    }
1172	    vm_map_reference(map);
1173	    task_unlock(task);
1174
1175	    vm_map_lock_read(map);
1176
1177	    start = address;
1178	    if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
1179		if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
1180			vm_map_unlock_read(map);
1181	    		vm_map_deallocate(map);
1182		   	return(0);
1183		}
1184	    } else {
1185		entry = tmp_entry;
1186	    }
1187
1188	    start = entry->vme_start;
1189
1190	    pinfo->pri_offset = entry->offset;
1191	    pinfo->pri_protection = entry->protection;
1192	    pinfo->pri_max_protection = entry->max_protection;
1193	    pinfo->pri_inheritance = entry->inheritance;
1194	    pinfo->pri_behavior = entry->behavior;
1195	    pinfo->pri_user_wired_count = entry->user_wired_count;
1196	    pinfo->pri_user_tag = entry->alias;
1197
1198	    if (entry->is_sub_map) {
1199		pinfo->pri_flags |= PROC_REGION_SUBMAP;
1200	    } else {
1201		if (entry->is_shared)
1202			pinfo->pri_flags |= PROC_REGION_SHARED;
1203	    }
1204
1205
1206	    extended.protection = entry->protection;
1207	    extended.user_tag = entry->alias;
1208	    extended.pages_resident = 0;
1209	    extended.pages_swapped_out = 0;
1210	    extended.pages_shared_now_private = 0;
1211	    extended.pages_dirtied = 0;
1212	    extended.external_pager = 0;
1213	    extended.shadow_depth = 0;
1214
1215	    vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended);
1216
1217	    if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
1218	            extended.share_mode = SM_PRIVATE;
1219
1220	    top.private_pages_resident = 0;
1221	    top.shared_pages_resident = 0;
1222	    vm_map_region_top_walk(entry, &top);
1223
1224
1225	    pinfo->pri_pages_resident = extended.pages_resident;
1226	    pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private;
1227	    pinfo->pri_pages_swapped_out = extended.pages_swapped_out;
1228	    pinfo->pri_pages_dirtied = extended.pages_dirtied;
1229	    pinfo->pri_ref_count = extended.ref_count;
1230	    pinfo->pri_shadow_depth = extended.shadow_depth;
1231	    pinfo->pri_share_mode = extended.share_mode;
1232
1233	    pinfo->pri_private_pages_resident = top.private_pages_resident;
1234	    pinfo->pri_shared_pages_resident = top.shared_pages_resident;
1235	    pinfo->pri_obj_id = top.obj_id;
1236
1237	    pinfo->pri_address = (uint64_t)start;
1238	    pinfo->pri_size = (uint64_t)(entry->vme_end - start);
1239	    pinfo->pri_depth = 0;
1240
1241	    if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
1242		*vnodeaddr = (uintptr_t)0;
1243
1244		if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
1245			vm_map_unlock_read(map);
1246	    		vm_map_deallocate(map);
1247			return(1);
1248		}
1249	    }
1250
1251	    vm_map_unlock_read(map);
1252	    vm_map_deallocate(map);
1253	    return(1);
1254}
1255
1256static int
1257fill_vnodeinfoforaddr(
1258	vm_map_entry_t			entry,
1259	uintptr_t * vnodeaddr,
1260	uint32_t * vid)
1261{
1262	vm_object_t	top_object, object;
1263	memory_object_t memory_object;
1264	memory_object_pager_ops_t pager_ops;
1265	kern_return_t	kr;
1266	int		shadow_depth;
1267
1268
1269	if (entry->is_sub_map) {
1270		return(0);
1271	} else {
1272		/*
1273		 * The last object in the shadow chain has the
1274		 * relevant pager information.
1275		 */
1276		top_object = entry->object.vm_object;
1277		if (top_object == VM_OBJECT_NULL) {
1278			object = VM_OBJECT_NULL;
1279			shadow_depth = 0;
1280		} else {
1281			vm_object_lock(top_object);
1282			for (object = top_object, shadow_depth = 0;
1283			     object->shadow != VM_OBJECT_NULL;
1284			     object = object->shadow, shadow_depth++) {
1285				vm_object_lock(object->shadow);
1286				vm_object_unlock(object);
1287			}
1288		}
1289	}
1290
1291	if (object == VM_OBJECT_NULL) {
1292		return(0);
1293	} else if (object->internal) {
1294		vm_object_unlock(object);
1295		return(0);
1296	} else if (! object->pager_ready ||
1297		   object->terminating ||
1298		   ! object->alive) {
1299		vm_object_unlock(object);
1300		return(0);
1301	} else {
1302		memory_object = object->pager;
1303		pager_ops = memory_object->mo_pager_ops;
1304		if (pager_ops == &vnode_pager_ops) {
1305			kr = vnode_pager_get_object_vnode(
1306				memory_object,
1307				vnodeaddr, vid);
1308			if (kr != KERN_SUCCESS) {
1309				vm_object_unlock(object);
1310				return(0);
1311			}
1312		} else {
1313			vm_object_unlock(object);
1314			return(0);
1315		}
1316	}
1317	vm_object_unlock(object);
1318	return(1);
1319}
1320
1321kern_return_t
1322vnode_pager_get_object_vnode (
1323	memory_object_t		mem_obj,
1324	uintptr_t * vnodeaddr,
1325	uint32_t * vid)
1326{
1327	vnode_pager_t	vnode_object;
1328
1329	vnode_object = vnode_pager_lookup(mem_obj);
1330	if (vnode_object->vnode_handle)  {
1331		*vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
1332		*vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle);
1333
1334		return(KERN_SUCCESS);
1335	}
1336
1337	return(KERN_FAILURE);
1338}
1339
1340
1341/*
1342 * Find the underlying vnode object for the given vm_map_entry.  If found, return with the
1343 * object locked, otherwise return NULL with nothing locked.
1344 */
1345
1346vm_object_t
1347find_vnode_object(
1348	vm_map_entry_t	entry
1349)
1350{
1351	vm_object_t			top_object, object;
1352	memory_object_t 		memory_object;
1353	memory_object_pager_ops_t	pager_ops;
1354
1355	if (!entry->is_sub_map) {
1356
1357		/*
1358		 * The last object in the shadow chain has the
1359		 * relevant pager information.
1360		 */
1361
1362		top_object = entry->object.vm_object;
1363
1364		if (top_object) {
1365			vm_object_lock(top_object);
1366
1367			for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
1368				vm_object_lock(object->shadow);
1369				vm_object_unlock(object);
1370			}
1371
1372			if (object && !object->internal && object->pager_ready && !object->terminating &&
1373			    object->alive) {
1374				memory_object = object->pager;
1375				pager_ops = memory_object->mo_pager_ops;
1376
1377				/*
1378				 * If this object points to the vnode_pager_ops, then we found what we're
1379				 * looking for.  Otherwise, this vm_map_entry doesn't have an underlying
1380				 * vnode and so we fall through to the bottom and return NULL.
1381				 */
1382
1383				if (pager_ops == &vnode_pager_ops)
1384					return object;		/* we return with the object locked */
1385			}
1386
1387			vm_object_unlock(object);
1388		}
1389
1390	}
1391
1392	return(VM_OBJECT_NULL);
1393}
1394