1/*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57/*
58 *	Default Pager.
59 *		Memory Object Management.
60 */
61
62#include "default_pager_internal.h"
63#include <default_pager/default_pager_object_server.h>
64#include <mach/memory_object_default_server.h>
65#include <mach/memory_object_control.h>
66#include <mach/memory_object_types.h>
67#include <mach/memory_object_server.h>
68#include <mach/upl.h>
69#include <mach/vm_map.h>
70#include <vm/memory_object.h>
71#include <vm/vm_pageout.h>
72#include <vm/vm_map.h>
73#include <vm/vm_protos.h>
74
75/* forward declaration */
76vstruct_t vs_object_create(dp_size_t size);
77
78/*
79 * List of all vstructs.  A specific vstruct is
80 * found directly via its port, this list is
81 * only used for monitoring purposes by the
82 * default_pager_object* calls and by ps_delete
83 * when abstract memory objects must be scanned
84 * to remove any live storage on a segment which
85 * is to be removed.
86 */
87struct vstruct_list_head	vstruct_list;
88
89__private_extern__ void
90vstruct_list_insert(
91	vstruct_t vs)
92{
93	VSL_LOCK();
94	queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
95	vstruct_list.vsl_count++;
96	VSL_UNLOCK();
97}
98
99
100__private_extern__ void
101vstruct_list_delete(
102	vstruct_t vs)
103{
104	queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
105	vstruct_list.vsl_count--;
106}
107
108/*
109 * We use the sequence numbers on requests to regulate
110 * our parallelism.  In general, we allow multiple reads and writes
111 * to proceed in parallel, with the exception that reads must
112 * wait for previous writes to finish.  (Because the kernel might
113 * generate a data-request for a page on the heels of a data-write
114 * for the same page, and we must avoid returning stale data.)
115 * terminate requests wait for proceeding reads and writes to finish.
116 */
117
118static unsigned int	default_pager_total = 0;		/* debugging */
119static unsigned int	default_pager_wait_seqno = 0;		/* debugging */
120static unsigned int	default_pager_wait_read = 0;		/* debugging */
121static unsigned int	default_pager_wait_write = 0;		/* debugging */
122
123__private_extern__ void
124vs_async_wait(
125	vstruct_t	vs)
126{
127
128	ASSERT(vs->vs_async_pending >= 0);
129	while (vs->vs_async_pending > 0) {
130		vs->vs_waiting_async = TRUE;
131		assert_wait(&vs->vs_async_pending, THREAD_UNINT);
132		VS_UNLOCK(vs);
133		thread_block(THREAD_CONTINUE_NULL);
134		VS_LOCK(vs);
135	}
136	ASSERT(vs->vs_async_pending == 0);
137}
138
139
140#if	PARALLEL
141/*
142 * Waits for correct sequence number.  Leaves pager locked.
143 *
144 * JMM - Sequence numbers guarantee ordering of requests generated
145 *	 by a single thread if the receiver is multithreaded and
146 *	 the interfaces are asynchronous (i.e. sender can generate
147 *	 more than one request before the first is received in the
148 *	 pager).  Normally, IPC would generate these number in that
149 *	 case.  But we are trying to avoid using IPC for the in-kernel
150 *	 scenario. Since these are actually invoked synchronously
151 *	 anyway (in-kernel), we can just fake the sequence number
152 *	 generation here (thus avoiding the dependence on IPC).
153 */
154__private_extern__ void
155vs_lock(
156	vstruct_t		vs)
157{
158	mach_port_seqno_t	seqno;
159
160	default_pager_total++;
161	VS_LOCK(vs);
162
163	seqno = vs->vs_next_seqno++;
164
165	while (vs->vs_seqno != seqno) {
166		default_pager_wait_seqno++;
167		vs->vs_waiting_seqno = TRUE;
168		assert_wait(&vs->vs_seqno, THREAD_UNINT);
169		VS_UNLOCK(vs);
170		thread_block(THREAD_CONTINUE_NULL);
171		VS_LOCK(vs);
172	}
173}
174
175/*
176 * Increments sequence number and unlocks pager.
177 */
178__private_extern__ void
179vs_unlock(vstruct_t vs)
180{
181	vs->vs_seqno++;
182	if (vs->vs_waiting_seqno) {
183		vs->vs_waiting_seqno = FALSE;
184		VS_UNLOCK(vs);
185		thread_wakeup(&vs->vs_seqno);
186		return;
187	}
188	VS_UNLOCK(vs);
189}
190
191/*
192 * Start a read - one more reader.  Pager must be locked.
193 */
194__private_extern__ void
195vs_start_read(
196	vstruct_t vs)
197{
198	vs->vs_readers++;
199}
200
201/*
202 * Wait for readers.  Unlocks and relocks pager if wait needed.
203 */
204__private_extern__ void
205vs_wait_for_readers(
206	vstruct_t vs)
207{
208	while (vs->vs_readers != 0) {
209		default_pager_wait_read++;
210		vs->vs_waiting_read = TRUE;
211		assert_wait(&vs->vs_readers, THREAD_UNINT);
212		VS_UNLOCK(vs);
213		thread_block(THREAD_CONTINUE_NULL);
214		VS_LOCK(vs);
215	}
216}
217
218/*
219 * Finish a read.  Pager is unlocked and returns unlocked.
220 */
221__private_extern__ void
222vs_finish_read(
223	vstruct_t vs)
224{
225	VS_LOCK(vs);
226	if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
227		vs->vs_waiting_read = FALSE;
228		VS_UNLOCK(vs);
229		thread_wakeup(&vs->vs_readers);
230		return;
231	}
232	VS_UNLOCK(vs);
233}
234
235/*
236 * Start a write - one more writer.  Pager must be locked.
237 */
238__private_extern__ void
239vs_start_write(
240	vstruct_t vs)
241{
242	vs->vs_writers++;
243}
244
245/*
246 * Wait for writers.  Unlocks and relocks pager if wait needed.
247 */
248__private_extern__ void
249vs_wait_for_writers(
250	vstruct_t vs)
251{
252	while (vs->vs_writers != 0) {
253		default_pager_wait_write++;
254		vs->vs_waiting_write = TRUE;
255		assert_wait(&vs->vs_writers, THREAD_UNINT);
256		VS_UNLOCK(vs);
257		thread_block(THREAD_CONTINUE_NULL);
258		VS_LOCK(vs);
259	}
260	vs_async_wait(vs);
261}
262
263/* This is to be used for the transfer from segment code ONLY */
264/* The transfer code holds off vs destruction by keeping the  */
265/* vs_async_wait count non-zero.  It will not ocnflict with   */
266/* other writers on an async basis because it only writes on  */
267/* a cluster basis into fresh (as of sync time) cluster locations */
268
269__private_extern__ void
270vs_wait_for_sync_writers(
271        vstruct_t vs)
272{
273        while (vs->vs_writers != 0) {
274                default_pager_wait_write++;
275		vs->vs_waiting_write = TRUE;
276                assert_wait(&vs->vs_writers, THREAD_UNINT);
277                VS_UNLOCK(vs);
278                thread_block(THREAD_CONTINUE_NULL);
279                VS_LOCK(vs);
280        }
281}
282
283
284/*
285 * Finish a write.  Pager is unlocked and returns unlocked.
286 */
287__private_extern__ void
288vs_finish_write(
289	vstruct_t vs)
290{
291	VS_LOCK(vs);
292	if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
293		vs->vs_waiting_write = FALSE;
294		VS_UNLOCK(vs);
295		thread_wakeup(&vs->vs_writers);
296		return;
297	}
298	VS_UNLOCK(vs);
299}
300#endif	/* PARALLEL */
301
302vstruct_t
303vs_object_create(
304	dp_size_t size)
305{
306	vstruct_t	vs;
307
308	/*
309	 * Allocate a vstruct. If there are any problems, then report them
310	 * to the console.
311	 */
312	vs = ps_vstruct_create(size);
313	if (vs == VSTRUCT_NULL) {
314		dprintf(("vs_object_create: unable to allocate %s\n",
315			 "-- either run swapon command or reboot"));
316		return VSTRUCT_NULL;
317	}
318
319	return vs;
320}
321
322#if 0
323void default_pager_add(vstruct_t, boolean_t);	/* forward */
324
325void
326default_pager_add(
327	vstruct_t vs,
328	boolean_t internal)
329{
330	memory_object_t		mem_obj = vs->vs_mem_obj;
331	mach_port_t		pset;
332	mach_port_mscount_t 	sync;
333	mach_port_t		previous;
334	kern_return_t		kr;
335	static char		here[] = "default_pager_add";
336
337	/*
338	 * The port currently has a make-send count of zero,
339	 * because either we just created the port or we just
340	 * received the port in a memory_object_create request.
341	 */
342
343	if (internal) {
344		/* possibly generate an immediate no-senders notification */
345		sync = 0;
346		pset = default_pager_internal_set;
347	} else {
348		/* delay notification till send right is created */
349		sync = 1;
350		pset = default_pager_external_set;
351	}
352
353	ip_lock(mem_obj);  /* unlocked in nsrequest below */
354	ipc_port_make_sonce_locked(mem_obj);
355	ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
356}
357
358#endif
359
360const struct memory_object_pager_ops default_pager_ops = {
361	dp_memory_object_reference,
362	dp_memory_object_deallocate,
363	dp_memory_object_init,
364	dp_memory_object_terminate,
365	dp_memory_object_data_request,
366	dp_memory_object_data_return,
367	dp_memory_object_data_initialize,
368	dp_memory_object_data_unlock,
369	dp_memory_object_synchronize,
370	dp_memory_object_map,
371	dp_memory_object_last_unmap,
372	dp_memory_object_data_reclaim,
373	"default pager"
374};
375
376kern_return_t
377dp_memory_object_init(
378	memory_object_t		mem_obj,
379	memory_object_control_t	control,
380	__unused memory_object_cluster_size_t pager_page_size)
381{
382	vstruct_t		vs;
383
384	assert(pager_page_size == vm_page_size);
385
386	memory_object_control_reference(control);
387
388	vs_lookup(mem_obj, vs);
389	vs_lock(vs);
390
391	if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
392		Panic("bad request");
393
394	vs->vs_control = control;
395	vs_unlock(vs);
396
397	return KERN_SUCCESS;
398}
399
400kern_return_t
401dp_memory_object_synchronize(
402	memory_object_t		mem_obj,
403	memory_object_offset_t	offset,
404	memory_object_size_t		length,
405	__unused vm_sync_t		flags)
406{
407	vstruct_t	vs;
408
409	vs_lookup(mem_obj, vs);
410	vs_lock(vs);
411	vs_unlock(vs);
412
413	memory_object_synchronize_completed(vs->vs_control, offset, length);
414
415	return KERN_SUCCESS;
416}
417
418kern_return_t
419dp_memory_object_map(
420	__unused memory_object_t	mem_obj,
421	__unused vm_prot_t		prot)
422{
423	panic("dp_memory_object_map");
424	return KERN_FAILURE;
425}
426
427kern_return_t
428dp_memory_object_last_unmap(
429	__unused memory_object_t	mem_obj)
430{
431	panic("dp_memory_object_last_unmap");
432	return KERN_FAILURE;
433}
434
435kern_return_t
436dp_memory_object_data_reclaim(
437	memory_object_t		mem_obj,
438	boolean_t		reclaim_backing_store)
439{
440	vstruct_t		vs;
441	kern_return_t		retval;
442
443	vs_lookup(mem_obj, vs);
444	for (;;) {
445		vs_lock(vs);
446		vs_async_wait(vs);
447		if (!vs->vs_xfer_pending) {
448			break;
449		}
450	}
451	vs->vs_xfer_pending = TRUE;
452	vs_unlock(vs);
453
454	retval = ps_vstruct_reclaim(vs, TRUE, reclaim_backing_store);
455
456	vs_lock(vs);
457	vs->vs_xfer_pending = FALSE;
458	vs_unlock(vs);
459
460	return retval;
461}
462
463kern_return_t
464dp_memory_object_terminate(
465	memory_object_t		mem_obj)
466{
467	memory_object_control_t	control;
468	vstruct_t		vs;
469
470	/*
471	 * control port is a receive right, not a send right.
472	 */
473
474	vs_lookup(mem_obj, vs);
475	vs_lock(vs);
476
477	/*
478	 * Wait for read and write requests to terminate.
479	 */
480
481	vs_wait_for_readers(vs);
482	vs_wait_for_writers(vs);
483
484	/*
485	 * After memory_object_terminate both memory_object_init
486	 * and a no-senders notification are possible, so we need
487	 * to clean up our reference to the memory_object_control
488	 * to prepare for a new init.
489	 */
490
491	control = vs->vs_control;
492	vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
493
494	/* a bit of special case ugliness here.  Wakeup any waiting reads */
495	/* these data requests had to be removed from the seqno traffic   */
496	/* based on a performance bottleneck with large memory objects    */
497	/* the problem will right itself with the new component based     */
498	/* synchronous interface.  The new async will be able to return   */
499	/* failure during its sync phase.   In the mean time ... */
500
501	thread_wakeup(&vs->vs_writers);
502	thread_wakeup(&vs->vs_async_pending);
503
504	vs_unlock(vs);
505
506	/*
507	 * Now we deallocate our reference on the control.
508	 */
509	memory_object_control_deallocate(control);
510	return KERN_SUCCESS;
511}
512
513void
514dp_memory_object_reference(
515	memory_object_t		mem_obj)
516{
517	vstruct_t		vs;
518
519	vs_lookup_safe(mem_obj, vs);
520	if (vs == VSTRUCT_NULL)
521		return;
522
523	VS_LOCK(vs);
524	assert(vs->vs_references > 0);
525	vs->vs_references++;
526	VS_UNLOCK(vs);
527}
528
529void
530dp_memory_object_deallocate(
531	memory_object_t		mem_obj)
532{
533	vstruct_t		vs;
534	mach_port_seqno_t	seqno;
535
536	/*
537	 * Because we don't give out multiple first references
538	 * for a memory object, there can't be a race
539	 * between getting a deallocate call and creating
540	 * a new reference for the object.
541	 */
542
543	vs_lookup_safe(mem_obj, vs);
544	if (vs == VSTRUCT_NULL)
545		return;
546
547	VS_LOCK(vs);
548	if (--vs->vs_references > 0) {
549		VS_UNLOCK(vs);
550		return;
551	}
552
553	seqno = vs->vs_next_seqno++;
554	while (vs->vs_seqno != seqno) {
555		default_pager_wait_seqno++;
556		vs->vs_waiting_seqno = TRUE;
557		assert_wait(&vs->vs_seqno, THREAD_UNINT);
558		VS_UNLOCK(vs);
559		thread_block(THREAD_CONTINUE_NULL);
560		VS_LOCK(vs);
561	}
562
563	vs_async_wait(vs);	/* wait for pending async IO */
564
565	/* do not delete the vs structure until the referencing pointers */
566	/* in the vstruct list have been expunged */
567
568	/* get VSL_LOCK out of order by using TRY mechanism */
569	while(!VSL_LOCK_TRY()) {
570		VS_UNLOCK(vs);
571		VSL_LOCK();
572		VSL_UNLOCK();
573		VS_LOCK(vs);
574		vs_async_wait(vs);	/* wait for pending async IO */
575	}
576
577
578	/*
579	 * We shouldn't get a deallocation call
580	 * when the kernel has the object cached.
581	 */
582	if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
583		Panic("bad request");
584
585	/*
586	 * Unlock the pager (though there should be no one
587	 * waiting for it).
588	 */
589	VS_UNLOCK(vs);
590
591	/* Lock out paging segment removal for the duration of this */
592	/* call.  We are vulnerable to losing a paging segment we rely */
593	/* on as soon as we remove ourselves from the VSL and unlock */
594
595	/* Keep our thread from blocking on attempt to trigger backing */
596	/* store release */
597	backing_store_release_trigger_disable += 1;
598
599	/*
600	 * Remove the memory object port association, and then
601	 * the destroy the port itself.  We must remove the object
602	 * from the port list before deallocating the pager,
603	 * because of default_pager_objects.
604	 */
605	vstruct_list_delete(vs);
606	VSL_UNLOCK();
607
608	ps_vstruct_dealloc(vs);
609
610	VSL_LOCK();
611	backing_store_release_trigger_disable -= 1;
612	if(backing_store_release_trigger_disable == 0) {
613		thread_wakeup((event_t)&backing_store_release_trigger_disable);
614	}
615	VSL_UNLOCK();
616}
617
618kern_return_t
619dp_memory_object_data_request(
620	memory_object_t		mem_obj,
621	memory_object_offset_t	offset,
622	memory_object_cluster_size_t		length,
623	__unused vm_prot_t	protection_required,
624        memory_object_fault_info_t	fault_info)
625{
626	vstruct_t		vs;
627	kern_return_t		kr = KERN_SUCCESS;
628
629	GSTAT(global_stats.gs_pagein_calls++);
630
631
632	/* CDY at this moment vs_lookup panics when presented with the wrong */
633	/* port.  As we are expanding this pager to support user interfaces */
634	/* this should be changed to return kern_failure */
635	vs_lookup(mem_obj, vs);
636	vs_lock(vs);
637
638	/* We are going to relax the strict sequencing here for performance */
639	/* reasons.  We can do this because we know that the read and */
640	/* write threads are different and we rely on synchronization */
641	/* of read and write requests at the cache memory_object level */
642	/* break out wait_for_writers, all of this goes away when */
643	/* we get real control of seqno with the new component interface */
644
645	if (vs->vs_writers != 0) {
646		/* you can't hold on to the seqno and go */
647		/* to sleep like that */
648		vs_unlock(vs);  /* bump internal count of seqno */
649		VS_LOCK(vs);
650		while (vs->vs_writers != 0) {
651			default_pager_wait_write++;
652			vs->vs_waiting_write = TRUE;
653			assert_wait(&vs->vs_writers, THREAD_UNINT);
654			VS_UNLOCK(vs);
655			thread_block(THREAD_CONTINUE_NULL);
656			VS_LOCK(vs);
657			vs_async_wait(vs);
658		}
659		if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
660			VS_UNLOCK(vs);
661			return KERN_FAILURE;
662		}
663		vs_start_read(vs);
664		VS_UNLOCK(vs);
665	} else {
666		vs_start_read(vs);
667		vs_unlock(vs);
668	}
669
670	/*
671	 * Request must be on a page boundary and a multiple of pages.
672	 */
673	if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
674		Panic("bad alignment");
675
676	assert((dp_offset_t) offset == offset);
677	kr = pvs_cluster_read(vs, (dp_offset_t) offset, length, fault_info);
678
679	/* Regular data requests have a non-zero length and always return KERN_SUCCESS.
680	   Their actual success is determined by the fact that they provide a page or not,
681	   i.e whether we call upl_commit() or upl_abort().  A length of 0 means that the
682	   caller is only asking if the pager has a copy of that page or not.  The answer to
683	   that question is provided by the return value.  KERN_SUCCESS means that the pager
684	   does have that page.
685	*/
686	if(length) {
687		kr = KERN_SUCCESS;
688	}
689
690	vs_finish_read(vs);
691
692	return kr;
693}
694
695/*
696 * memory_object_data_initialize: check whether we already have each page, and
697 * write it if we do not.  The implementation is far from optimized, and
698 * also assumes that the default_pager is single-threaded.
699 */
700/*  It is questionable whether or not a pager should decide what is relevant */
701/* and what is not in data sent from the kernel.  Data initialize has been */
702/* changed to copy back all data sent to it in preparation for its eventual */
703/* merge with data return.  It is the kernel that should decide what pages */
704/* to write back.  As of the writing of this note, this is indeed the case */
705/* the kernel writes back one page at a time through this interface */
706
707kern_return_t
708dp_memory_object_data_initialize(
709	memory_object_t		mem_obj,
710	memory_object_offset_t	offset,
711	memory_object_cluster_size_t		size)
712{
713	vstruct_t	vs;
714
715	DP_DEBUG(DEBUG_MO_EXTERNAL,
716		 ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
717		  (int)mem_obj, (int)offset, (int)size));
718	GSTAT(global_stats.gs_pages_init += atop_32(size));
719
720	vs_lookup(mem_obj, vs);
721	vs_lock(vs);
722	vs_start_write(vs);
723	vs_unlock(vs);
724
725	/*
726	 * Write the data via clustered writes. vs_cluster_write will
727	 * loop if the address range specified crosses cluster
728	 * boundaries.
729	 */
730	assert((upl_offset_t) offset == offset);
731	vs_cluster_write(vs, 0, (upl_offset_t)offset, size, FALSE, 0);
732
733	vs_finish_write(vs);
734
735	return KERN_SUCCESS;
736}
737
738kern_return_t
739dp_memory_object_data_unlock(
740	__unused memory_object_t		mem_obj,
741	__unused memory_object_offset_t	offset,
742	__unused memory_object_size_t		size,
743	__unused vm_prot_t		desired_access)
744{
745	Panic("dp_memory_object_data_unlock: illegal");
746	return KERN_FAILURE;
747}
748
749
750/*ARGSUSED8*/
751kern_return_t
752dp_memory_object_data_return(
753	memory_object_t		mem_obj,
754	memory_object_offset_t	offset,
755	memory_object_cluster_size_t			size,
756	__unused memory_object_offset_t	*resid_offset,
757	__unused int		*io_error,
758	__unused boolean_t	dirty,
759	__unused boolean_t	kernel_copy,
760	__unused int	upl_flags)
761{
762	vstruct_t	vs;
763
764	DP_DEBUG(DEBUG_MO_EXTERNAL,
765		 ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
766		  (int)mem_obj, (int)offset, (int)size));
767	GSTAT(global_stats.gs_pageout_calls++);
768
769	/* This routine is called by the pageout thread.  The pageout thread */
770	/* cannot be blocked by read activities unless the read activities   */
771	/* Therefore the grant of vs lock must be done on a try versus a      */
772	/* blocking basis.  The code below relies on the fact that the       */
773	/* interface is synchronous.  Should this interface be again async   */
774	/* for some type  of pager in the future the pages will have to be   */
775	/* returned through a separate, asynchronous path.		     */
776
777	vs_lookup(mem_obj, vs);
778
779        default_pager_total++;
780
781	/* might be unreachable if VS_TRY_LOCK is, by definition, always true */
782	__unreachable_ok_push
783	if(!VS_TRY_LOCK(vs)) {
784		/* the call below will not be done by caller when we have */
785		/* a synchronous interface */
786		/* return KERN_LOCK_OWNED; */
787		upl_t		upl;
788		unsigned int	page_list_count = 0;
789		memory_object_super_upl_request(vs->vs_control,
790					(memory_object_offset_t)offset,
791					size, size,
792					&upl, NULL, &page_list_count,
793					UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
794					| UPL_NO_SYNC | UPL_COPYOUT_FROM);
795		upl_abort(upl,0);
796		upl_deallocate(upl);
797		return KERN_SUCCESS;
798	}
799	__unreachable_ok_pop
800
801	if ((vs->vs_seqno != vs->vs_next_seqno++)
802			|| (vs->vs_readers)
803			|| (vs->vs_xfer_pending)) {
804		upl_t		upl;
805		unsigned int	page_list_count = 0;
806
807		vs->vs_next_seqno--;
808                VS_UNLOCK(vs);
809
810		/* the call below will not be done by caller when we have */
811		/* a synchronous interface */
812		/* return KERN_LOCK_OWNED; */
813		memory_object_super_upl_request(vs->vs_control,
814                                (memory_object_offset_t)offset,
815				size, size,
816				&upl, NULL, &page_list_count,
817				UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
818					| UPL_NO_SYNC | UPL_COPYOUT_FROM);
819		upl_abort(upl,0);
820		upl_deallocate(upl);
821		return KERN_SUCCESS;
822	}
823
824	if ((size % vm_page_size) != 0)
825		Panic("bad alignment");
826
827	vs_start_write(vs);
828
829
830        vs->vs_async_pending += 1;  /* protect from backing store contraction */
831	vs_unlock(vs);
832
833	/*
834	 * Write the data via clustered writes. vs_cluster_write will
835	 * loop if the address range specified crosses cluster
836	 * boundaries.
837	 */
838	assert((upl_offset_t) offset == offset);
839	vs_cluster_write(vs, 0, (upl_offset_t) offset, size, FALSE, 0);
840
841	vs_finish_write(vs);
842
843	/* temporary, need a finer lock based on cluster */
844
845	VS_LOCK(vs);
846	vs->vs_async_pending -= 1;  /* release vs_async_wait */
847	if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
848		vs->vs_waiting_async = FALSE;
849		VS_UNLOCK(vs);
850		thread_wakeup(&vs->vs_async_pending);
851	} else {
852		VS_UNLOCK(vs);
853	}
854
855
856	return KERN_SUCCESS;
857}
858
859/*
860 * Routine:	default_pager_memory_object_create
861 * Purpose:
862 * 	Handle requests for memory objects from the
863 * 	kernel.
864 * Notes:
865 * 	Because we only give out the default memory
866 * 	manager port to the kernel, we don't have to
867 * 	be so paranoid about the contents.
868 */
869kern_return_t
870default_pager_memory_object_create(
871	__unused memory_object_default_t	dmm,
872	vm_size_t		new_size,
873	memory_object_t		*new_mem_obj)
874{
875	vstruct_t		vs;
876
877	assert(dmm == default_pager_object);
878
879	if ((dp_size_t) new_size != new_size) {
880		/* 32-bit overflow */
881		return KERN_INVALID_ARGUMENT;
882	}
883
884	vs = vs_object_create((dp_size_t) new_size);
885	if (vs == VSTRUCT_NULL)
886		return KERN_RESOURCE_SHORTAGE;
887
888	vs->vs_next_seqno = 0;
889
890	/*
891	 * Set up associations between this memory object
892	 * and this default_pager structure
893	 */
894
895	vs->vs_pager_ops = &default_pager_ops;
896	vs->vs_pager_header.io_bits = IKOT_MEMORY_OBJECT;
897
898	/*
899	 * After this, other threads might receive requests
900	 * for this memory object or find it in the port list.
901	 */
902
903	vstruct_list_insert(vs);
904	*new_mem_obj = vs_to_mem_obj(vs);
905	return KERN_SUCCESS;
906}
907
908/*
909 * Create an external object.
910 */
911kern_return_t
912default_pager_object_create(
913	default_pager_t default_pager,
914	vm_size_t	size,
915	memory_object_t	*mem_objp)
916{
917	vstruct_t	vs;
918
919	if (default_pager != default_pager_object)
920		return KERN_INVALID_ARGUMENT;
921
922	if ((dp_size_t) size != size) {
923		/* 32-bit overflow */
924		return KERN_INVALID_ARGUMENT;
925	}
926
927	vs = vs_object_create((dp_size_t) size);
928	if (vs == VSTRUCT_NULL)
929		return KERN_RESOURCE_SHORTAGE;
930
931	/*
932	 * Set up associations between the default pager
933	 * and this vstruct structure
934	 */
935	vs->vs_pager_ops = &default_pager_ops;
936	vstruct_list_insert(vs);
937	*mem_objp = vs_to_mem_obj(vs);
938	return KERN_SUCCESS;
939}
940
941kern_return_t
942default_pager_objects(
943	default_pager_t			default_pager,
944	default_pager_object_array_t	*objectsp,
945	mach_msg_type_number_t		*ocountp,
946	mach_port_array_t		*portsp,
947	mach_msg_type_number_t		*pcountp)
948{
949	vm_offset_t		oaddr = 0;	/* memory for objects */
950	vm_size_t		osize = 0;	/* current size */
951	default_pager_object_t	* objects;
952	unsigned int		opotential = 0;
953
954	vm_map_copy_t		pcopy = 0;	/* copy handle for pagers */
955	vm_size_t		psize = 0;	/* current size */
956	memory_object_t		* pagers;
957	unsigned int		ppotential = 0;
958
959	unsigned int		actual;
960	unsigned int		num_objects;
961	kern_return_t		kr;
962	vstruct_t		entry;
963
964	if (default_pager != default_pager_object)
965		return KERN_INVALID_ARGUMENT;
966
967	/*
968	 * We will send no more than this many
969	 */
970	actual = vstruct_list.vsl_count;
971
972	/*
973	 * Out out-of-line port arrays are simply kalloc'ed.
974	 */
975	psize = vm_map_round_page(actual * sizeof (*pagers),
976				  vm_map_page_mask(ipc_kernel_map));
977	ppotential = (unsigned int) (psize / sizeof (*pagers));
978	pagers = (memory_object_t *)kalloc(psize);
979	if (0 == pagers)
980		return KERN_RESOURCE_SHORTAGE;
981
982	/*
983	 * returned out of line data must be allocated out
984	 * the ipc_kernel_map, wired down, filled in, and
985	 * then "copied in" as if it had been sent by a
986	 * user process.
987	 */
988	osize = vm_map_round_page(actual * sizeof (*objects),
989				  vm_map_page_mask(ipc_kernel_map));
990	opotential = (unsigned int) (osize / sizeof (*objects));
991	kr = kmem_alloc(ipc_kernel_map, &oaddr, osize);
992	if (KERN_SUCCESS != kr) {
993		kfree(pagers, psize);
994		return KERN_RESOURCE_SHORTAGE;
995	}
996	objects = (default_pager_object_t *)oaddr;
997
998
999	/*
1000	 * Now scan the list.
1001	 */
1002
1003	VSL_LOCK();
1004
1005	num_objects = 0;
1006	queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
1007
1008		memory_object_t			pager;
1009		vm_size_t			size;
1010
1011		if ((num_objects >= opotential) ||
1012		    (num_objects >= ppotential)) {
1013
1014			/*
1015			 * This should be rare.  In any case,
1016			 * we will only miss recent objects,
1017			 * because they are added at the end.
1018			 */
1019			break;
1020		}
1021
1022		/*
1023		 * Avoid interfering with normal operations
1024		 */
1025		if (!VS_MAP_TRY_LOCK(entry))
1026			goto not_this_one;
1027		size = ps_vstruct_allocated_size(entry);
1028		VS_MAP_UNLOCK(entry);
1029
1030		VS_LOCK(entry);
1031
1032		/*
1033		 * We need a reference for our caller.  Adding this
1034		 * reference through the linked list could race with
1035		 * destruction of the object.  If we find the object
1036		 * has no references, just give up on it.
1037		 */
1038		VS_LOCK(entry);
1039		if (entry->vs_references == 0) {
1040			VS_UNLOCK(entry);
1041			goto not_this_one;
1042		}
1043		pager = vs_to_mem_obj(entry);
1044		dp_memory_object_reference(pager);
1045		VS_UNLOCK(entry);
1046
1047		/* the arrays are wired, so no deadlock worries */
1048
1049		objects[num_objects].dpo_object = (vm_offset_t) entry;
1050		objects[num_objects].dpo_size = size;
1051		pagers [num_objects++] = pager;
1052		continue;
1053
1054	    not_this_one:
1055		/*
1056		 * Do not return garbage
1057		 */
1058		objects[num_objects].dpo_object = (vm_offset_t) 0;
1059		objects[num_objects].dpo_size = 0;
1060		pagers[num_objects++] = MEMORY_OBJECT_NULL;
1061
1062	}
1063
1064	VSL_UNLOCK();
1065
1066	/* clear out any excess allocation */
1067	while (num_objects < opotential) {
1068		objects[--opotential].dpo_object = (vm_offset_t) 0;
1069		objects[opotential].dpo_size = 0;
1070	}
1071	while (num_objects < ppotential) {
1072		pagers[--ppotential] = MEMORY_OBJECT_NULL;
1073	}
1074
1075	kr = vm_map_unwire(ipc_kernel_map,
1076			   vm_map_trunc_page(oaddr,
1077					     vm_map_page_mask(ipc_kernel_map)),
1078			   vm_map_round_page(oaddr + osize,
1079					     vm_map_page_mask(ipc_kernel_map)),
1080			   FALSE);
1081	assert(KERN_SUCCESS == kr);
1082	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)oaddr,
1083			   (vm_map_size_t)osize, TRUE, &pcopy);
1084	assert(KERN_SUCCESS == kr);
1085
1086	*objectsp = (default_pager_object_array_t)objects;
1087	*ocountp = num_objects;
1088	*portsp = (mach_port_array_t)pcopy;
1089	*pcountp = num_objects;
1090
1091	return KERN_SUCCESS;
1092}
1093
1094kern_return_t
1095default_pager_object_pages(
1096	default_pager_t		default_pager,
1097	mach_port_t			memory_object,
1098	default_pager_page_array_t	*pagesp,
1099	mach_msg_type_number_t		*countp)
1100{
1101	vm_offset_t			addr = 0; /* memory for page offsets */
1102	vm_size_t			size = 0; /* current memory size */
1103	vm_map_copy_t			copy;
1104	default_pager_page_t		* pages = 0;
1105	unsigned int			potential;
1106	unsigned int			actual;
1107	kern_return_t			kr;
1108	memory_object_t			object;
1109
1110	if (default_pager != default_pager_object)
1111		return KERN_INVALID_ARGUMENT;
1112
1113	object = (memory_object_t) memory_object;
1114
1115	potential = 0;
1116	for (;;) {
1117		vstruct_t	entry;
1118
1119		VSL_LOCK();
1120		queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1121			      vs_links) {
1122			VS_LOCK(entry);
1123			if (vs_to_mem_obj(entry) == object) {
1124				VSL_UNLOCK();
1125				goto found_object;
1126			}
1127			VS_UNLOCK(entry);
1128		}
1129		VSL_UNLOCK();
1130
1131		/* did not find the object */
1132		if (0 != addr)
1133			kmem_free(ipc_kernel_map, addr, size);
1134
1135		return KERN_INVALID_ARGUMENT;
1136
1137	    found_object:
1138
1139		if (!VS_MAP_TRY_LOCK(entry)) {
1140			/* oh well bad luck */
1141			int wresult;
1142
1143			VS_UNLOCK(entry);
1144
1145			assert_wait_timeout((event_t)assert_wait_timeout, THREAD_UNINT, 1, 1000*NSEC_PER_USEC);
1146			wresult = thread_block(THREAD_CONTINUE_NULL);
1147			assert(wresult == THREAD_TIMED_OUT);
1148			continue;
1149		}
1150
1151		actual = ps_vstruct_allocated_pages(entry, pages, potential);
1152		VS_MAP_UNLOCK(entry);
1153		VS_UNLOCK(entry);
1154
1155		if (actual <= potential)
1156			break;
1157
1158		/* allocate more memory */
1159		if (0 != addr)
1160			kmem_free(ipc_kernel_map, addr, size);
1161
1162		size = vm_map_round_page(actual * sizeof (*pages),
1163					 vm_map_page_mask(ipc_kernel_map));
1164		kr = kmem_alloc(ipc_kernel_map, &addr, size);
1165		if (KERN_SUCCESS != kr)
1166			return KERN_RESOURCE_SHORTAGE;
1167
1168		pages = (default_pager_page_t *)addr;
1169		potential = (unsigned int) (size / sizeof (*pages));
1170	}
1171
1172	/*
1173	 * Clear unused memory.
1174	 */
1175	while (actual < potential)
1176		pages[--potential].dpp_offset = 0;
1177
1178	kr = vm_map_unwire(ipc_kernel_map,
1179			   vm_map_trunc_page(addr,
1180					     vm_map_page_mask(ipc_kernel_map)),
1181			   vm_map_round_page(addr + size,
1182					     vm_map_page_mask(ipc_kernel_map)),
1183			   FALSE);
1184	assert(KERN_SUCCESS == kr);
1185	kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr,
1186			   (vm_map_size_t)size, TRUE, &copy);
1187	assert(KERN_SUCCESS == kr);
1188
1189
1190	*pagesp = (default_pager_page_array_t)copy;
1191	*countp = actual;
1192	return KERN_SUCCESS;
1193}
1194