1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <vm/pmap.h>
29#include <kern/ledger.h>
30#include <i386/pmap_internal.h>
31
32
33/*
34 *	Each entry in the pv_head_table is locked by a bit in the
35 *	pv_lock_table.  The lock bits are accessed by the physical
36 *	address of the page they lock.
37 */
38
39char	*pv_lock_table;		/* pointer to array of bits */
40char    *pv_hash_lock_table;
41
42pv_rooted_entry_t	pv_head_table;		/* array of entries, one per
43						 * page */
44uint32_t			pv_hashed_free_count = 0;
45uint32_t			pv_hashed_kern_free_count = 0;
46
47pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[PMAP_PAGETABLE_CORRUPTION_MAX_LOG];
48uint32_t pmap_pagetable_corruption_incidents;
49uint64_t pmap_pagetable_corruption_last_abstime = (~(0ULL) >> 1);
50uint64_t pmap_pagetable_corruption_interval_abstime;
51thread_call_t 	pmap_pagetable_corruption_log_call;
52static thread_call_data_t 	pmap_pagetable_corruption_log_call_data;
53boolean_t pmap_pagetable_corruption_timeout = FALSE;
54
55volatile uint32_t	mappingrecurse = 0;
56
57uint32_t  pv_hashed_low_water_mark, pv_hashed_kern_low_water_mark, pv_hashed_alloc_chunk, pv_hashed_kern_alloc_chunk;
58
59thread_t mapping_replenish_thread;
60event_t	mapping_replenish_event, pmap_user_pv_throttle_event;
61
62uint64_t pmap_pv_throttle_stat, pmap_pv_throttled_waiters;
63
64unsigned int pmap_cache_attributes(ppnum_t pn) {
65	if (pmap_get_cache_attributes(pn) & INTEL_PTE_NCACHE)
66	        return (VM_WIMG_IO);
67	else
68		return (VM_WIMG_COPYBACK);
69}
70
71void	pmap_set_cache_attributes(ppnum_t pn, unsigned int cacheattr) {
72	unsigned int current, template = 0;
73	int pai;
74
75	if (cacheattr & VM_MEM_NOT_CACHEABLE) {
76		if(!(cacheattr & VM_MEM_GUARDED))
77			template |= PHYS_PTA;
78		template |= PHYS_NCACHE;
79	}
80
81	pmap_intr_assert();
82
83	assert((pn != vm_page_fictitious_addr) && (pn != vm_page_guard_addr));
84
85	pai = ppn_to_pai(pn);
86
87	if (!IS_MANAGED_PAGE(pai)) {
88		return;
89	}
90
91	/* override cache attributes for this phys page
92	 * Does not walk through existing mappings to adjust,
93	 * assumes page is disconnected
94	 */
95
96	LOCK_PVH(pai);
97
98	pmap_update_cache_attributes_locked(pn, template);
99
100	current = pmap_phys_attributes[pai] & PHYS_CACHEABILITY_MASK;
101	pmap_phys_attributes[pai] &= ~PHYS_CACHEABILITY_MASK;
102	pmap_phys_attributes[pai] |= template;
103
104	UNLOCK_PVH(pai);
105
106	if ((template & PHYS_NCACHE) && !(current & PHYS_NCACHE)) {
107		pmap_sync_page_attributes_phys(pn);
108	}
109}
110
111unsigned	pmap_get_cache_attributes(ppnum_t pn) {
112	if (last_managed_page == 0)
113		return 0;
114
115	if (!IS_MANAGED_PAGE(ppn_to_pai(pn))) {
116	    return INTEL_PTE_NCACHE;
117	}
118
119	/*
120	 * The cache attributes are read locklessly for efficiency.
121	 */
122	unsigned int attr = pmap_phys_attributes[ppn_to_pai(pn)];
123	unsigned int template = 0;
124
125	if (attr & PHYS_PTA)
126		template |= INTEL_PTE_PTA;
127	if (attr & PHYS_NCACHE)
128		template |= INTEL_PTE_NCACHE;
129	return template;
130}
131
132
133
134boolean_t
135pmap_is_noencrypt(ppnum_t pn)
136{
137	int		pai;
138
139	pai = ppn_to_pai(pn);
140
141	if (!IS_MANAGED_PAGE(pai))
142		return (FALSE);
143
144	if (pmap_phys_attributes[pai] & PHYS_NOENCRYPT)
145		return (TRUE);
146
147	return (FALSE);
148}
149
150
151void
152pmap_set_noencrypt(ppnum_t pn)
153{
154	int		pai;
155
156	pai = ppn_to_pai(pn);
157
158	if (IS_MANAGED_PAGE(pai)) {
159		LOCK_PVH(pai);
160
161		pmap_phys_attributes[pai] |= PHYS_NOENCRYPT;
162
163		UNLOCK_PVH(pai);
164	}
165}
166
167
168void
169pmap_clear_noencrypt(ppnum_t pn)
170{
171	int		pai;
172
173	pai = ppn_to_pai(pn);
174
175	if (IS_MANAGED_PAGE(pai)) {
176		/*
177		 * synchronization at VM layer prevents PHYS_NOENCRYPT
178		 * from changing state, so we don't need the lock to inspect
179		 */
180		if (pmap_phys_attributes[pai] & PHYS_NOENCRYPT) {
181			LOCK_PVH(pai);
182
183			pmap_phys_attributes[pai] &= ~PHYS_NOENCRYPT;
184
185			UNLOCK_PVH(pai);
186		}
187	}
188}
189
190void
191compute_pmap_gc_throttle(void *arg __unused)
192{
193
194}
195
196
197void
198pmap_lock_phys_page(ppnum_t pn)
199{
200	int		pai;
201
202	pai = ppn_to_pai(pn);
203
204	if (IS_MANAGED_PAGE(pai)) {
205		LOCK_PVH(pai);
206	} else
207		simple_lock(&phys_backup_lock);
208}
209
210
211void
212pmap_unlock_phys_page(ppnum_t pn)
213{
214	int		pai;
215
216	pai = ppn_to_pai(pn);
217
218	if (IS_MANAGED_PAGE(pai)) {
219		UNLOCK_PVH(pai);
220	} else
221		simple_unlock(&phys_backup_lock);
222}
223
224
225
226__private_extern__ void
227pmap_pagetable_corruption_msg_log(int (*log_func)(const char * fmt, ...)__printflike(1,2)) {
228	if (pmap_pagetable_corruption_incidents > 0) {
229		int i, e = MIN(pmap_pagetable_corruption_incidents, PMAP_PAGETABLE_CORRUPTION_MAX_LOG);
230		(*log_func)("%u pagetable corruption incident(s) detected, timeout: %u\n", pmap_pagetable_corruption_incidents, pmap_pagetable_corruption_timeout);
231		for (i = 0; i < e; i++) {
232			(*log_func)("Incident 0x%x, reason: 0x%x, action: 0x%x, time: 0x%llx\n", pmap_pagetable_corruption_records[i].incident,  pmap_pagetable_corruption_records[i].reason, pmap_pagetable_corruption_records[i].action, pmap_pagetable_corruption_records[i].abstime);
233		}
234	}
235}
236
237static inline void
238pmap_pagetable_corruption_log_setup(void) {
239	if (pmap_pagetable_corruption_log_call == NULL) {
240		nanotime_to_absolutetime(PMAP_PAGETABLE_CORRUPTION_INTERVAL, 0, &pmap_pagetable_corruption_interval_abstime);
241		thread_call_setup(&pmap_pagetable_corruption_log_call_data,
242		    (thread_call_func_t) pmap_pagetable_corruption_msg_log,
243		    (thread_call_param_t) &printf);
244		pmap_pagetable_corruption_log_call = &pmap_pagetable_corruption_log_call_data;
245	}
246}
247
248void
249mapping_free_prime(void)
250{
251	unsigned		i;
252	pv_hashed_entry_t	pvh_e;
253	pv_hashed_entry_t	pvh_eh;
254	pv_hashed_entry_t	pvh_et;
255	int			pv_cnt;
256
257	/* Scale based on DRAM size */
258	pv_hashed_low_water_mark = MAX(PV_HASHED_LOW_WATER_MARK_DEFAULT, ((uint32_t)(sane_size >> 30)) * 2000);
259	pv_hashed_low_water_mark = MIN(pv_hashed_low_water_mark, 16000);
260	/* Alterable via sysctl */
261	pv_hashed_kern_low_water_mark = MAX(PV_HASHED_KERN_LOW_WATER_MARK_DEFAULT, ((uint32_t)(sane_size >> 30)) * 1000);
262	pv_hashed_kern_low_water_mark = MIN(pv_hashed_kern_low_water_mark, 16000);
263	pv_hashed_kern_alloc_chunk = PV_HASHED_KERN_ALLOC_CHUNK_INITIAL;
264	pv_hashed_alloc_chunk = PV_HASHED_ALLOC_CHUNK_INITIAL;
265
266	pv_cnt = 0;
267	pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
268
269	for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK_INITIAL); i++) {
270		pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
271
272		pvh_e->qlink.next = (queue_entry_t)pvh_eh;
273		pvh_eh = pvh_e;
274
275		if (pvh_et == PV_HASHED_ENTRY_NULL)
276		        pvh_et = pvh_e;
277		pv_cnt++;
278	}
279	PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
280
281	pv_cnt = 0;
282	pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
283	for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK_INITIAL; i++) {
284		pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
285
286		pvh_e->qlink.next = (queue_entry_t)pvh_eh;
287		pvh_eh = pvh_e;
288
289		if (pvh_et == PV_HASHED_ENTRY_NULL)
290		        pvh_et = pvh_e;
291		pv_cnt++;
292	}
293	PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
294}
295
296void mapping_replenish(void);
297
298void mapping_adjust(void) {
299	kern_return_t mres;
300
301	pmap_pagetable_corruption_log_setup();
302
303	mres = kernel_thread_start_priority((thread_continue_t)mapping_replenish, NULL, MAXPRI_KERNEL, &mapping_replenish_thread);
304	if (mres != KERN_SUCCESS) {
305		panic("pmap: mapping_replenish_thread creation failed");
306	}
307	thread_deallocate(mapping_replenish_thread);
308}
309
310unsigned pmap_mapping_thread_wakeups;
311unsigned pmap_kernel_reserve_replenish_stat;
312unsigned pmap_user_reserve_replenish_stat;
313unsigned pmap_kern_reserve_alloc_stat;
314
315void mapping_replenish(void)
316{
317	pv_hashed_entry_t	pvh_e;
318	pv_hashed_entry_t	pvh_eh;
319	pv_hashed_entry_t	pvh_et;
320	int			pv_cnt;
321	unsigned             	i;
322
323	/* We qualify for VM privileges...*/
324	current_thread()->options |= TH_OPT_VMPRIV;
325
326	for (;;) {
327
328		while (pv_hashed_kern_free_count < pv_hashed_kern_low_water_mark) {
329			pv_cnt = 0;
330			pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
331
332			for (i = 0; i < pv_hashed_kern_alloc_chunk; i++) {
333				pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
334				pvh_e->qlink.next = (queue_entry_t)pvh_eh;
335				pvh_eh = pvh_e;
336
337				if (pvh_et == PV_HASHED_ENTRY_NULL)
338					pvh_et = pvh_e;
339				pv_cnt++;
340			}
341			pmap_kernel_reserve_replenish_stat += pv_cnt;
342			PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
343		}
344
345		pv_cnt = 0;
346		pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
347
348		if (pv_hashed_free_count < pv_hashed_low_water_mark) {
349			for (i = 0; i < pv_hashed_alloc_chunk; i++) {
350				pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
351
352				pvh_e->qlink.next = (queue_entry_t)pvh_eh;
353				pvh_eh = pvh_e;
354
355				if (pvh_et == PV_HASHED_ENTRY_NULL)
356					pvh_et = pvh_e;
357				pv_cnt++;
358			}
359			pmap_user_reserve_replenish_stat += pv_cnt;
360			PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
361		}
362/* Wake threads throttled while the kernel reserve was being replenished.
363 */
364		if (pmap_pv_throttled_waiters) {
365			pmap_pv_throttled_waiters = 0;
366			thread_wakeup(&pmap_user_pv_throttle_event);
367		}
368		/* Check if the kernel pool has been depleted since the
369		 * first pass, to reduce refill latency.
370		 */
371		if (pv_hashed_kern_free_count < pv_hashed_kern_low_water_mark)
372			continue;
373		/* Block sans continuation to avoid yielding kernel stack */
374		assert_wait(&mapping_replenish_event, THREAD_UNINT);
375		mappingrecurse = 0;
376		thread_block(THREAD_CONTINUE_NULL);
377		pmap_mapping_thread_wakeups++;
378	}
379}
380
381/*
382 *	Set specified attribute bits.
383 */
384
385void
386phys_attribute_set(
387	ppnum_t		pn,
388	int		bits)
389{
390	int		pai;
391
392	pmap_intr_assert();
393	assert(pn != vm_page_fictitious_addr);
394	if (pn == vm_page_guard_addr)
395		return;
396
397	pai = ppn_to_pai(pn);
398
399	if (!IS_MANAGED_PAGE(pai)) {
400		/* Not a managed page.  */
401		return;
402	}
403
404	LOCK_PVH(pai);
405	pmap_phys_attributes[pai] |= bits;
406	UNLOCK_PVH(pai);
407}
408
409/*
410 *	Set the modify bit on the specified physical page.
411 */
412
413void
414pmap_set_modify(ppnum_t pn)
415{
416	phys_attribute_set(pn, PHYS_MODIFIED);
417}
418
419/*
420 *	Clear the modify bits on the specified physical page.
421 */
422
423void
424pmap_clear_modify(ppnum_t pn)
425{
426	phys_attribute_clear(pn, PHYS_MODIFIED, 0, NULL);
427}
428
429/*
430 *	pmap_is_modified:
431 *
432 *	Return whether or not the specified physical page is modified
433 *	by any physical maps.
434 */
435
436boolean_t
437pmap_is_modified(ppnum_t pn)
438{
439	if (phys_attribute_test(pn, PHYS_MODIFIED))
440		return TRUE;
441	return FALSE;
442}
443
444
445/*
446 *	pmap_clear_reference:
447 *
448 *	Clear the reference bit on the specified physical page.
449 */
450
451void
452pmap_clear_reference(ppnum_t pn)
453{
454	phys_attribute_clear(pn, PHYS_REFERENCED, 0, NULL);
455}
456
457void
458pmap_set_reference(ppnum_t pn)
459{
460	phys_attribute_set(pn, PHYS_REFERENCED);
461}
462
463/*
464 *	pmap_is_referenced:
465 *
466 *	Return whether or not the specified physical page is referenced
467 *	by any physical maps.
468 */
469
470boolean_t
471pmap_is_referenced(ppnum_t pn)
472{
473        if (phys_attribute_test(pn, PHYS_REFERENCED))
474		return TRUE;
475	return FALSE;
476}
477
478
479/*
480 * pmap_get_refmod(phys)
481 *  returns the referenced and modified bits of the specified
482 *  physical page.
483 */
484unsigned int
485pmap_get_refmod(ppnum_t pn)
486{
487        int		refmod;
488	unsigned int	retval = 0;
489
490	refmod = phys_attribute_test(pn, PHYS_MODIFIED | PHYS_REFERENCED);
491
492	if (refmod & PHYS_MODIFIED)
493	        retval |= VM_MEM_MODIFIED;
494	if (refmod & PHYS_REFERENCED)
495	        retval |= VM_MEM_REFERENCED;
496
497	return (retval);
498}
499
500
501void
502pmap_clear_refmod_options(ppnum_t pn, unsigned int mask, unsigned int options, void *arg)
503{
504        unsigned int  x86Mask;
505
506        x86Mask = (   ((mask &   VM_MEM_MODIFIED)?   PHYS_MODIFIED : 0)
507		      | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
508
509        phys_attribute_clear(pn, x86Mask, options, arg);
510}
511
512/*
513 * pmap_clear_refmod(phys, mask)
514 *  clears the referenced and modified bits as specified by the mask
515 *  of the specified physical page.
516 */
517void
518pmap_clear_refmod(ppnum_t pn, unsigned int mask)
519{
520	unsigned int  x86Mask;
521
522	x86Mask = (   ((mask &   VM_MEM_MODIFIED)?   PHYS_MODIFIED : 0)
523	            | ((mask & VM_MEM_REFERENCED)? PHYS_REFERENCED : 0));
524
525	phys_attribute_clear(pn, x86Mask, 0, NULL);
526}
527
528unsigned int
529pmap_disconnect(ppnum_t pa)
530{
531	return (pmap_disconnect_options(pa, 0, NULL));
532}
533
534/*
535 *	Routine:
536 *		pmap_disconnect_options
537 *
538 *	Function:
539 *		Disconnect all mappings for this page and return reference and change status
540 *		in generic format.
541 *
542 */
543unsigned int
544pmap_disconnect_options(ppnum_t pa, unsigned int options, void *arg)
545{
546	unsigned refmod, vmrefmod = 0;
547
548	pmap_page_protect_options(pa, 0, options, arg);		/* disconnect the page */
549
550	pmap_assert(pa != vm_page_fictitious_addr);
551	if ((pa == vm_page_guard_addr) || !IS_MANAGED_PAGE(pa) || (options & PMAP_OPTIONS_NOREFMOD))
552		return 0;
553	refmod = pmap_phys_attributes[pa] & (PHYS_MODIFIED | PHYS_REFERENCED);
554
555	if (refmod & PHYS_MODIFIED)
556	        vmrefmod |= VM_MEM_MODIFIED;
557	if (refmod & PHYS_REFERENCED)
558	        vmrefmod |= VM_MEM_REFERENCED;
559
560	return vmrefmod;
561}
562