1/*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <vm/pmap.h>
29#include <vm/vm_map.h>
30#include <kern/ledger.h>
31#include <i386/pmap_internal.h>
32
33void		pmap_remove_range(
34			pmap_t		pmap,
35			vm_map_offset_t	va,
36			pt_entry_t	*spte,
37			pt_entry_t	*epte);
38
39uint32_t pmap_update_clear_pte_count;
40
41/*
42 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
43 * on a NBPDE boundary.
44 */
45
46/* These symbols may be referenced directly by VM */
47uint64_t pmap_nesting_size_min = NBPDE;
48uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE;
49
50/*
51 *	kern_return_t pmap_nest(grand, subord, va_start, size)
52 *
53 *	grand  = the pmap that we will nest subord into
54 *	subord = the pmap that goes into the grand
55 *	va_start  = start of range in pmap to be inserted
56 *	nstart  = start of range in pmap nested pmap
57 *	size   = Size of nest area (up to 16TB)
58 *
59 *	Inserts a pmap into another.  This is used to implement shared segments.
60 *
61 *	Note that we depend upon higher level VM locks to insure that things don't change while
62 *	we are doing this.  For example, VM should not be doing any pmap enters while it is nesting
63 *	or do 2 nests at once.
64 */
65
66/*
67 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
68 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
69 * container and the "grand" parent. A minor optimization to consider for the
70 * future: make the "subord" truly a container rather than a full-fledged
71 * pagetable hierarchy which can be unnecessarily sparse (DRK).
72 */
73
74kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) {
75	vm_map_offset_t	vaddr, nvaddr;
76	pd_entry_t	*pde,*npde;
77	unsigned int	i;
78	uint64_t	num_pde;
79
80	if ((size & (pmap_nesting_size_min-1)) ||
81	    (va_start & (pmap_nesting_size_min-1)) ||
82	    (nstart & (pmap_nesting_size_min-1)) ||
83	    ((size >> 28) > 65536))	/* Max size we can nest is 16TB */
84		return KERN_INVALID_VALUE;
85
86	if(size == 0) {
87		panic("pmap_nest: size is invalid - %016llX\n", size);
88	}
89
90	if (va_start != nstart)
91		panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
92
93	PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
94	(uintptr_t) grand, (uintptr_t) subord,
95	    (uintptr_t) (va_start>>32), (uintptr_t) va_start, 0);
96
97	nvaddr = (vm_map_offset_t)nstart;
98	num_pde = size >> PDESHIFT;
99
100	PMAP_LOCK(subord);
101
102	subord->pm_shared = TRUE;
103
104	for (i = 0; i < num_pde;) {
105		if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) {
106
107			npde = pmap64_pdpt(subord, nvaddr);
108
109			while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
110				PMAP_UNLOCK(subord);
111				pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
112				PMAP_LOCK(subord);
113				npde = pmap64_pdpt(subord, nvaddr);
114			}
115			*npde |= INTEL_PDPTE_NESTED;
116			nvaddr += NBPDPT;
117			i += (uint32_t)NPDEPG;
118		}
119		else {
120			npde = pmap_pde(subord, nvaddr);
121
122			while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
123				PMAP_UNLOCK(subord);
124				pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
125				PMAP_LOCK(subord);
126				npde = pmap_pde(subord, nvaddr);
127			}
128			nvaddr += NBPDE;
129			i++;
130		}
131	}
132
133	PMAP_UNLOCK(subord);
134
135	vaddr = (vm_map_offset_t)va_start;
136
137	PMAP_LOCK(grand);
138
139	for (i = 0;i < num_pde;) {
140		pd_entry_t tpde;
141
142		if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) {
143			npde = pmap64_pdpt(subord, vaddr);
144			if (npde == 0)
145				panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
146			tpde = *npde;
147			pde = pmap64_pdpt(grand, vaddr);
148			if (0 == pde) {
149				PMAP_UNLOCK(grand);
150				pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
151				PMAP_LOCK(grand);
152				pde = pmap64_pdpt(grand, vaddr);
153			}
154			if (pde == 0)
155				panic("pmap_nest: no PDPT, grand  %p vaddr 0x%llx", grand, vaddr);
156			pmap_store_pte(pde, tpde);
157			vaddr += NBPDPT;
158			i += (uint32_t) NPDEPG;
159		}
160		else {
161			npde = pmap_pde(subord, nstart);
162			if (npde == 0)
163				panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart);
164			tpde = *npde;
165			nstart += NBPDE;
166			pde = pmap_pde(grand, vaddr);
167			if ((0 == pde) && cpu_64bit) {
168				PMAP_UNLOCK(grand);
169				pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
170				PMAP_LOCK(grand);
171				pde = pmap_pde(grand, vaddr);
172			}
173
174			if (pde == 0)
175				panic("pmap_nest: no pde, grand  %p vaddr 0x%llx", grand, vaddr);
176			vaddr += NBPDE;
177			pmap_store_pte(pde, tpde);
178			i++;
179		}
180	}
181
182	PMAP_UNLOCK(grand);
183
184	PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
185
186	return KERN_SUCCESS;
187}
188
189/*
190 *	kern_return_t pmap_unnest(grand, vaddr)
191 *
192 *	grand  = the pmap that we will un-nest subord from
193 *	vaddr  = start of range in pmap to be unnested
194 *
195 *	Removes a pmap from another.  This is used to implement shared segments.
196 */
197
198kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
199
200	pd_entry_t *pde;
201	unsigned int i;
202	uint64_t num_pde;
203	addr64_t va_start, va_end;
204	uint64_t npdpt = PMAP_INVALID_PDPTNUM;
205
206	PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
207	    (uintptr_t) grand,
208	    (uintptr_t) (vaddr>>32), (uintptr_t) vaddr, 0, 0);
209
210	if ((size & (pmap_nesting_size_min-1)) ||
211	    (vaddr & (pmap_nesting_size_min-1))) {
212		panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
213		    grand, vaddr, size);
214	}
215
216	/* align everything to PDE boundaries */
217	va_start = vaddr & ~(NBPDE-1);
218	va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
219	size = va_end - va_start;
220
221	PMAP_LOCK(grand);
222
223	num_pde = size >> PDESHIFT;
224	vaddr = va_start;
225
226	for (i = 0; i < num_pde; ) {
227		if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) {
228			npdpt = pdptnum(grand, vaddr);
229			pde = pmap64_pdpt(grand, vaddr);
230			if (pde && (*pde & INTEL_PDPTE_NESTED)) {
231				pmap_store_pte(pde, (pd_entry_t)0);
232				i += (uint32_t) NPDEPG;
233				vaddr += NBPDPT;
234				continue;
235			}
236		}
237		pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
238		if (pde == 0)
239			panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr);
240		pmap_store_pte(pde, (pd_entry_t)0);
241		i++;
242		vaddr += NBPDE;
243	}
244
245	PMAP_UPDATE_TLBS(grand, va_start, va_end);
246
247	PMAP_UNLOCK(grand);
248
249	PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
250
251	return KERN_SUCCESS;
252}
253
254/* Invoked by the Mach VM to determine the platform specific unnest region */
255
256boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) {
257	pd_entry_t *pdpte;
258	boolean_t rval = FALSE;
259
260	if (!cpu_64bit)
261		return rval;
262
263	PMAP_LOCK(p);
264
265	pdpte = pmap64_pdpt(p, *s);
266	if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
267		*s &= ~(NBPDPT -1);
268		rval = TRUE;
269	}
270
271	pdpte = pmap64_pdpt(p, *e);
272	if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
273		*e = ((*e + NBPDPT) & ~(NBPDPT -1));
274		rval = TRUE;
275	}
276
277	PMAP_UNLOCK(p);
278
279	return rval;
280}
281
282/*
283 * pmap_find_phys returns the (4K) physical page number containing a
284 * given virtual address in a given pmap.
285 * Note that pmap_pte may return a pde if this virtual address is
286 * mapped by a large page and this is taken into account in order
287 * to return the correct page number in this case.
288 */
289ppnum_t
290pmap_find_phys(pmap_t pmap, addr64_t va)
291{
292	pt_entry_t	*ptp;
293	pd_entry_t	*pdep;
294	ppnum_t		ppn = 0;
295	pd_entry_t	pde;
296	pt_entry_t	pte;
297
298	mp_disable_preemption();
299
300	/* This refcount test is a band-aid--several infrastructural changes
301	 * are necessary to eliminate invocation of this routine from arbitrary
302	 * contexts.
303	 */
304
305	if (!pmap->ref_count)
306		goto pfp_exit;
307
308	pdep = pmap_pde(pmap, va);
309
310	if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) {
311		if (pde & INTEL_PTE_PS) {
312			ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
313			ppn += (ppnum_t) ptenum(va);
314		}
315		else {
316			ptp = pmap_pte(pmap, va);
317			if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) {
318				ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
319			}
320		}
321	}
322pfp_exit:
323	mp_enable_preemption();
324
325        return ppn;
326}
327
328/*
329 * Update cache attributes for all extant managed mappings.
330 * Assumes PV for this page is locked, and that the page
331 * is managed.
332 */
333
334void
335pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
336	pv_rooted_entry_t	pv_h, pv_e;
337	pv_hashed_entry_t       pvh_e, nexth;
338	vm_map_offset_t vaddr;
339	pmap_t	pmap;
340	pt_entry_t	*ptep;
341
342	assert(IS_MANAGED_PAGE(pn));
343
344	pv_h = pai_to_pvh(pn);
345	/* TODO: translate the PHYS_* bits to PTE bits, while they're
346	 * currently identical, they may not remain so
347	 * Potential optimization (here and in page_protect),
348	 * parallel shootdowns, check for redundant
349	 * attribute modifications.
350	 */
351
352	/*
353	 * Alter attributes on all mappings
354	 */
355	if (pv_h->pmap != PMAP_NULL) {
356		pv_e = pv_h;
357		pvh_e = (pv_hashed_entry_t)pv_e;
358
359		do {
360			pmap = pv_e->pmap;
361			vaddr = pv_e->va;
362			ptep = pmap_pte(pmap, vaddr);
363
364			if (0 == ptep)
365				panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
366
367			nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
368			pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
369			PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
370			pvh_e = nexth;
371		} while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
372	}
373}
374
375void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
376	assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
377
378	if (dofilter) {
379		CPU_CR3_MARK_INACTIVE();
380	} else {
381		CPU_CR3_MARK_ACTIVE();
382		__asm__ volatile("mfence");
383		if (current_cpu_datap()->cpu_tlb_invalid)
384			process_pmap_updates();
385	}
386}
387
388
389/*
390 *	Insert the given physical page (p) at
391 *	the specified virtual address (v) in the
392 *	target physical map with the protection requested.
393 *
394 *	If specified, the page will be wired down, meaning
395 *	that the related pte cannot be reclaimed.
396 *
397 *	NB:  This is the only routine which MAY NOT lazy-evaluate
398 *	or lose information.  That is, this routine must actually
399 *	insert this page into the given map NOW.
400 */
401
402void
403pmap_enter(
404	register pmap_t		pmap,
405 	vm_map_offset_t		vaddr,
406	ppnum_t                 pn,
407	vm_prot_t		prot,
408	vm_prot_t		fault_type,
409	unsigned int 		flags,
410	boolean_t		wired)
411{
412	(void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE);
413}
414
415kern_return_t
416pmap_enter_options(
417	register pmap_t		pmap,
418 	vm_map_offset_t		vaddr,
419	ppnum_t                 pn,
420	vm_prot_t		prot,
421	__unused vm_prot_t	fault_type,
422	unsigned int 		flags,
423	boolean_t		wired,
424	unsigned int		options)
425{
426	pt_entry_t		*pte;
427	pv_rooted_entry_t	pv_h;
428	ppnum_t			pai;
429	pv_hashed_entry_t	pvh_e;
430	pv_hashed_entry_t	pvh_new;
431	pt_entry_t		template;
432	pmap_paddr_t		old_pa;
433	pmap_paddr_t		pa = (pmap_paddr_t) i386_ptob(pn);
434	boolean_t		need_tlbflush = FALSE;
435	boolean_t		set_NX;
436	char			oattr;
437	boolean_t		old_pa_locked;
438	/* 2MiB mappings are confined to x86_64 by VM */
439	boolean_t		superpage = flags & VM_MEM_SUPERPAGE;
440	vm_object_t		delpage_pm_obj = NULL;
441	int			delpage_pde_index = 0;
442	pt_entry_t		old_pte;
443	kern_return_t		kr_expand;
444
445	pmap_intr_assert();
446
447	if (pmap == PMAP_NULL)
448		return KERN_INVALID_ARGUMENT;
449
450	/* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
451	 * unused value for that scenario.
452	 */
453	assert(pn != vm_page_fictitious_addr);
454
455	if (pn == vm_page_guard_addr)
456		return KERN_INVALID_ARGUMENT;
457
458	PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
459	    pmap,
460	    (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
461	    pn, prot);
462
463	if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
464		set_NX = FALSE;
465	else
466		set_NX = TRUE;
467
468	if (__improbable(set_NX && (pmap == kernel_pmap) && ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) || (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
469		set_NX = FALSE;
470	}
471
472	/*
473	 *	Must allocate a new pvlist entry while we're unlocked;
474	 *	zalloc may cause pageout (which will lock the pmap system).
475	 *	If we determine we need a pvlist entry, we will unlock
476	 *	and allocate one.  Then we will retry, throughing away
477	 *	the allocated entry later (if we no longer need it).
478	 */
479
480	pvh_new = PV_HASHED_ENTRY_NULL;
481Retry:
482	pvh_e = PV_HASHED_ENTRY_NULL;
483
484	PMAP_LOCK(pmap);
485
486	/*
487	 *	Expand pmap to include this pte.  Assume that
488	 *	pmap is always expanded to include enough hardware
489	 *	pages to map one VM page.
490	 */
491	 if(superpage) {
492	 	while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
493			/* need room for another pde entry */
494			PMAP_UNLOCK(pmap);
495			kr_expand = pmap_expand_pdpt(pmap, vaddr, options);
496			if (kr_expand != KERN_SUCCESS)
497				return kr_expand;
498			PMAP_LOCK(pmap);
499		}
500	} else {
501		while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
502			/*
503			 * Must unlock to expand the pmap
504			 * going to grow pde level page(s)
505			 */
506			PMAP_UNLOCK(pmap);
507			kr_expand = pmap_expand(pmap, vaddr, options);
508			if (kr_expand != KERN_SUCCESS)
509				return kr_expand;
510			PMAP_LOCK(pmap);
511		}
512	}
513	if (options & PMAP_EXPAND_OPTIONS_NOENTER) {
514		PMAP_UNLOCK(pmap);
515		return KERN_SUCCESS;
516	}
517
518	if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
519		/*
520		 * There is still an empty page table mapped that
521		 * was used for a previous base page mapping.
522		 * Remember the PDE and the PDE index, so that we
523		 * can free the page at the end of this function.
524		 */
525		delpage_pde_index = (int)pdeidx(pmap, vaddr);
526		delpage_pm_obj = pmap->pm_obj;
527		*pte = 0;
528	}
529
530	old_pa = pte_to_pa(*pte);
531	pai = pa_index(old_pa);
532	old_pa_locked = FALSE;
533
534	/*
535	 * if we have a previous managed page, lock the pv entry now. after
536	 * we lock it, check to see if someone beat us to the lock and if so
537	 * drop the lock
538	 */
539	if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
540		LOCK_PVH(pai);
541		old_pa_locked = TRUE;
542		old_pa = pte_to_pa(*pte);
543		if (0 == old_pa) {
544			UNLOCK_PVH(pai);	/* another path beat us to it */
545			old_pa_locked = FALSE;
546		}
547	}
548
549	/*
550	 *	Special case if the incoming physical page is already mapped
551	 *	at this address.
552	 */
553	if (old_pa == pa) {
554		pt_entry_t old_attributes =
555		    *pte & ~(INTEL_PTE_REF | INTEL_PTE_MOD);
556
557		/*
558	         *	May be changing its wired attribute or protection
559	         */
560
561		template = pa_to_pte(pa) | INTEL_PTE_VALID;
562		template |= pmap_get_cache_attributes(pa_index(pa));
563
564		if (VM_MEM_NOT_CACHEABLE ==
565		    (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
566			if (!(flags & VM_MEM_GUARDED))
567				template |= INTEL_PTE_PTA;
568			template |= INTEL_PTE_NCACHE;
569		}
570		if (pmap != kernel_pmap)
571			template |= INTEL_PTE_USER;
572		if (prot & VM_PROT_WRITE)
573			template |= INTEL_PTE_WRITE;
574
575		if (set_NX)
576			template |= INTEL_PTE_NX;
577
578		if (wired) {
579			template |= INTEL_PTE_WIRED;
580			if (!iswired(old_attributes))  {
581				OSAddAtomic(+1, &pmap->stats.wired_count);
582				pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
583			}
584		} else {
585			if (iswired(old_attributes)) {
586				assert(pmap->stats.wired_count >= 1);
587				OSAddAtomic(-1, &pmap->stats.wired_count);
588				pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
589			}
590		}
591		if (superpage)		/* this path can not be used */
592			template |= INTEL_PTE_PS;	/* to change the page size! */
593		/* Determine delta, PV locked */
594		need_tlbflush =
595		    ((old_attributes ^ template) != INTEL_PTE_WIRED);
596
597		/* store modified PTE and preserve RC bits */
598		pt_entry_t npte, opte;;
599		do {
600			opte = *pte;
601			npte = template | (opte & (INTEL_PTE_REF | INTEL_PTE_MOD));
602		} while (!pmap_cmpx_pte(pte, opte, npte));
603		if (old_pa_locked) {
604			UNLOCK_PVH(pai);
605			old_pa_locked = FALSE;
606		}
607		goto Done;
608	}
609
610	/*
611	 *	Outline of code from here:
612	 *	   1) If va was mapped, update TLBs, remove the mapping
613	 *	      and remove old pvlist entry.
614	 *	   2) Add pvlist entry for new mapping
615	 *	   3) Enter new mapping.
616	 *
617	 *	If the old physical page is not managed step 1) is skipped
618	 *	(except for updating the TLBs), and the mapping is
619	 *	overwritten at step 3).  If the new physical page is not
620	 *	managed, step 2) is skipped.
621	 */
622
623	if (old_pa != (pmap_paddr_t) 0) {
624
625		/*
626	         *	Don't do anything to pages outside valid memory here.
627	         *	Instead convince the code that enters a new mapping
628	         *	to overwrite the old one.
629	         */
630
631		/* invalidate the PTE */
632		pmap_update_pte(pte, INTEL_PTE_VALID, 0);
633		/* propagate invalidate everywhere */
634		PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
635		/* remember reference and change */
636		old_pte	= *pte;
637		oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED));
638		/* completely invalidate the PTE */
639		pmap_store_pte(pte, 0);
640
641		if (IS_MANAGED_PAGE(pai)) {
642			pmap_assert(old_pa_locked == TRUE);
643			pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
644			assert(pmap->stats.resident_count >= 1);
645			OSAddAtomic(-1, &pmap->stats.resident_count);
646			if (iswired(*pte)) {
647				assert(pmap->stats.wired_count >= 1);
648				OSAddAtomic(-1, &pmap->stats.wired_count);
649				pmap_ledger_debit(pmap, task_ledgers.wired_mem,
650				    PAGE_SIZE);
651			}
652			pmap_phys_attributes[pai] |= oattr;
653
654			/*
655			 *	Remove the mapping from the pvlist for
656			 *	this physical page.
657			 *      We'll end up with either a rooted pv or a
658			 *      hashed pv
659			 */
660			pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte);
661
662		} else {
663
664			/*
665			 *	old_pa is not managed.
666			 *	Do removal part of accounting.
667			 */
668
669			if (iswired(*pte)) {
670				assert(pmap->stats.wired_count >= 1);
671				OSAddAtomic(-1, &pmap->stats.wired_count);
672				pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
673			}
674		}
675	}
676
677	/*
678	 * if we had a previously managed paged locked, unlock it now
679	 */
680	if (old_pa_locked) {
681		UNLOCK_PVH(pai);
682		old_pa_locked = FALSE;
683	}
684
685	pai = pa_index(pa);	/* now working with new incoming phys page */
686	if (IS_MANAGED_PAGE(pai)) {
687
688		/*
689	         *	Step 2) Enter the mapping in the PV list for this
690	         *	physical page.
691	         */
692		pv_h = pai_to_pvh(pai);
693
694		LOCK_PVH(pai);
695
696		if (pv_h->pmap == PMAP_NULL) {
697			/*
698			 *	No mappings yet, use rooted pv
699			 */
700			pv_h->va = vaddr;
701			pv_h->pmap = pmap;
702			queue_init(&pv_h->qlink);
703		} else {
704			/*
705			 *	Add new pv_hashed_entry after header.
706			 */
707			if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
708				pvh_e = pvh_new;
709				pvh_new = PV_HASHED_ENTRY_NULL;
710			} else if (PV_HASHED_ENTRY_NULL == pvh_e) {
711				PV_HASHED_ALLOC(&pvh_e);
712				if (PV_HASHED_ENTRY_NULL == pvh_e) {
713					/*
714					 * the pv list is empty. if we are on
715					 * the kernel pmap we'll use one of
716					 * the special private kernel pv_e's,
717					 * else, we need to unlock
718					 * everything, zalloc a pv_e, and
719					 * restart bringing in the pv_e with
720					 * us.
721					 */
722					if (kernel_pmap == pmap) {
723						PV_HASHED_KERN_ALLOC(&pvh_e);
724					} else {
725						UNLOCK_PVH(pai);
726						PMAP_UNLOCK(pmap);
727						pmap_pv_throttle(pmap);
728						pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
729						goto Retry;
730					}
731				}
732			}
733
734			if (PV_HASHED_ENTRY_NULL == pvh_e)
735				panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
736
737			pvh_e->va = vaddr;
738			pvh_e->pmap = pmap;
739			pvh_e->ppn = pn;
740			pv_hash_add(pvh_e, pv_h);
741
742			/*
743			 *	Remember that we used the pvlist entry.
744			 */
745			pvh_e = PV_HASHED_ENTRY_NULL;
746		}
747
748		/*
749	         * only count the mapping
750	         * for 'managed memory'
751	         */
752		pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
753		OSAddAtomic(+1,  &pmap->stats.resident_count);
754		if (pmap->stats.resident_count > pmap->stats.resident_max) {
755			pmap->stats.resident_max = pmap->stats.resident_count;
756		}
757	} else if (last_managed_page == 0) {
758		/* Account for early mappings created before "managed pages"
759		 * are determined. Consider consulting the available DRAM map.
760		 */
761		pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
762		OSAddAtomic(+1,  &pmap->stats.resident_count);
763	}
764	/*
765	 * Step 3) Enter the mapping.
766	 *
767	 *	Build a template to speed up entering -
768	 *	only the pfn changes.
769	 */
770	template = pa_to_pte(pa) | INTEL_PTE_VALID;
771	/*
772	 * DRK: It may be worth asserting on cache attribute flags that diverge
773	 * from the existing physical page attributes.
774	 */
775
776	template |= pmap_get_cache_attributes(pa_index(pa));
777
778	if (flags & VM_MEM_NOT_CACHEABLE) {
779		if (!(flags & VM_MEM_GUARDED))
780			template |= INTEL_PTE_PTA;
781		template |= INTEL_PTE_NCACHE;
782	}
783	if (pmap != kernel_pmap)
784		template |= INTEL_PTE_USER;
785	if (prot & VM_PROT_WRITE)
786		template |= INTEL_PTE_WRITE;
787	if (set_NX)
788		template |= INTEL_PTE_NX;
789	if (wired) {
790		template |= INTEL_PTE_WIRED;
791		OSAddAtomic(+1,  & pmap->stats.wired_count);
792		pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
793	}
794	if (superpage)
795		template |= INTEL_PTE_PS;
796	pmap_store_pte(pte, template);
797
798	/*
799	 * if this was a managed page we delayed unlocking the pv until here
800	 * to prevent pmap_page_protect et al from finding it until the pte
801	 * has been stored
802	 */
803	if (IS_MANAGED_PAGE(pai)) {
804		UNLOCK_PVH(pai);
805	}
806Done:
807	if (need_tlbflush == TRUE)
808		PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
809
810	if (pvh_e != PV_HASHED_ENTRY_NULL) {
811		PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
812	}
813	if (pvh_new != PV_HASHED_ENTRY_NULL) {
814		PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
815	}
816	PMAP_UNLOCK(pmap);
817
818	if (delpage_pm_obj) {
819		vm_page_t m;
820
821		vm_object_lock(delpage_pm_obj);
822		m = vm_page_lookup(delpage_pm_obj, delpage_pde_index);
823		if (m == VM_PAGE_NULL)
824		    panic("pmap_enter: pte page not in object");
825		vm_object_unlock(delpage_pm_obj);
826		VM_PAGE_FREE(m);
827		OSAddAtomic(-1,  &inuse_ptepages_count);
828		PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
829	}
830
831	PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
832	return KERN_SUCCESS;
833}
834
835/*
836 *	Remove a range of hardware page-table entries.
837 *	The entries given are the first (inclusive)
838 *	and last (exclusive) entries for the VM pages.
839 *	The virtual address is the va for the first pte.
840 *
841 *	The pmap must be locked.
842 *	If the pmap is not the kernel pmap, the range must lie
843 *	entirely within one pte-page.  This is NOT checked.
844 *	Assumes that the pte-page exists.
845 */
846
847void
848pmap_remove_range(
849	pmap_t			pmap,
850	vm_map_offset_t		start_vaddr,
851	pt_entry_t		*spte,
852	pt_entry_t		*epte)
853{
854	pt_entry_t		*cpte;
855	pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
856	pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
857	pv_hashed_entry_t       pvh_e;
858	int			pvh_cnt = 0;
859	int			num_removed, num_unwired, num_found, num_invalid;
860	ppnum_t			pai;
861	pmap_paddr_t		pa;
862	vm_map_offset_t		vaddr;
863
864	num_removed = 0;
865	num_unwired = 0;
866	num_found   = 0;
867	num_invalid = 0;
868#if	defined(__i386__)
869	if (pmap != kernel_pmap &&
870	    pmap->pm_task_map == TASK_MAP_32BIT &&
871	    start_vaddr >= HIGH_MEM_BASE) {
872		/*
873		 * The range is in the "high_shared_pde" which is shared
874		 * between the kernel and all 32-bit tasks.  It holds
875		 * the 32-bit commpage but also the trampolines, GDT, etc...
876		 * so we can't let user tasks remove anything from it.
877		 */
878		return;
879	}
880#endif
881	/* invalidate the PTEs first to "freeze" them */
882	for (cpte = spte, vaddr = start_vaddr;
883	     cpte < epte;
884	     cpte++, vaddr += PAGE_SIZE_64) {
885		pt_entry_t p = *cpte;
886
887		pa = pte_to_pa(p);
888		if (pa == 0)
889			continue;
890		num_found++;
891
892		if (iswired(p))
893			num_unwired++;
894
895		pai = pa_index(pa);
896
897		if (!IS_MANAGED_PAGE(pai)) {
898			/*
899			 *	Outside range of managed physical memory.
900			 *	Just remove the mappings.
901			 */
902			pmap_store_pte(cpte, 0);
903			continue;
904		}
905
906		if ((p & INTEL_PTE_VALID) == 0)
907			num_invalid++;
908
909		/* invalidate the PTE */
910		pmap_update_pte(cpte, INTEL_PTE_VALID, 0);
911	}
912
913	if (num_found == 0) {
914		/* nothing was changed: we're done */
915	        goto update_counts;
916	}
917
918	/* propagate the invalidates to other CPUs */
919
920	PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
921
922	for (cpte = spte, vaddr = start_vaddr;
923	     cpte < epte;
924	     cpte++, vaddr += PAGE_SIZE_64) {
925
926		pa = pte_to_pa(*cpte);
927		if (pa == 0)
928			continue;
929
930		pai = pa_index(pa);
931
932		LOCK_PVH(pai);
933
934		pa = pte_to_pa(*cpte);
935		if (pa == 0) {
936			UNLOCK_PVH(pai);
937			continue;
938		}
939		num_removed++;
940
941		/*
942	       	 * Get the modify and reference bits, then
943	       	 * nuke the entry in the page table
944	       	 */
945		/* remember reference and change */
946		pmap_phys_attributes[pai] |=
947			(char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
948
949		/*
950	      	 * Remove the mapping from the pvlist for this physical page.
951	         */
952		pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte);
953
954		/* completely invalidate the PTE */
955		pmap_store_pte(cpte, 0);
956
957		UNLOCK_PVH(pai);
958
959		if (pvh_e != PV_HASHED_ENTRY_NULL) {
960			pvh_e->qlink.next = (queue_entry_t) pvh_eh;
961			pvh_eh = pvh_e;
962
963			if (pvh_et == PV_HASHED_ENTRY_NULL) {
964				pvh_et = pvh_e;
965			}
966			pvh_cnt++;
967		}
968	} /* for loop */
969
970	if (pvh_eh != PV_HASHED_ENTRY_NULL) {
971		PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
972	}
973update_counts:
974	/*
975	 *	Update the counts
976	 */
977#if TESTING
978	if (pmap->stats.resident_count < num_removed)
979	        panic("pmap_remove_range: resident_count");
980#endif
981	pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
982	assert(pmap->stats.resident_count >= num_removed);
983	OSAddAtomic(-num_removed,  &pmap->stats.resident_count);
984
985#if TESTING
986	if (pmap->stats.wired_count < num_unwired)
987	        panic("pmap_remove_range: wired_count");
988#endif
989	assert(pmap->stats.wired_count >= num_unwired);
990	OSAddAtomic(-num_unwired,  &pmap->stats.wired_count);
991	pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
992
993	return;
994}
995
996
997/*
998 *	Remove the given range of addresses
999 *	from the specified map.
1000 *
1001 *	It is assumed that the start and end are properly
1002 *	rounded to the hardware page size.
1003 */
1004void
1005pmap_remove(
1006	pmap_t		map,
1007	addr64_t	s64,
1008	addr64_t	e64)
1009{
1010	pt_entry_t     *pde;
1011	pt_entry_t     *spte, *epte;
1012	addr64_t        l64;
1013	uint64_t        deadline;
1014
1015	pmap_intr_assert();
1016
1017	if (map == PMAP_NULL || s64 == e64)
1018		return;
1019
1020	PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
1021		   map,
1022		   (uint32_t) (s64 >> 32), s64,
1023		   (uint32_t) (e64 >> 32), e64);
1024
1025
1026	PMAP_LOCK(map);
1027
1028#if 0
1029	/*
1030	 * Check that address range in the kernel does not overlap the stacks.
1031	 * We initialize local static min/max variables once to avoid making
1032	 * 2 function calls for every remove. Note also that these functions
1033	 * both return 0 before kernel stacks have been initialized, and hence
1034	 * the panic is not triggered in this case.
1035	 */
1036	if (map == kernel_pmap) {
1037		static vm_offset_t kernel_stack_min = 0;
1038		static vm_offset_t kernel_stack_max = 0;
1039
1040		if (kernel_stack_min == 0) {
1041			kernel_stack_min = min_valid_stack_address();
1042			kernel_stack_max = max_valid_stack_address();
1043		}
1044		if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
1045		    (kernel_stack_min < e64 && e64 <= kernel_stack_max))
1046			panic("pmap_remove() attempted in kernel stack");
1047	}
1048#else
1049
1050	/*
1051	 * The values of kernel_stack_min and kernel_stack_max are no longer
1052	 * relevant now that we allocate kernel stacks in the kernel map,
1053	 * so the old code above no longer applies.  If we wanted to check that
1054	 * we weren't removing a mapping of a page in a kernel stack we'd
1055	 * mark the PTE with an unused bit and check that here.
1056	 */
1057
1058#endif
1059
1060	deadline = rdtsc64() + max_preemption_latency_tsc;
1061
1062	while (s64 < e64) {
1063		l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1064		if (l64 > e64)
1065			l64 = e64;
1066		pde = pmap_pde(map, s64);
1067
1068		if (pde && (*pde & INTEL_PTE_VALID)) {
1069			if (*pde & INTEL_PTE_PS) {
1070				/*
1071				 * If we're removing a superpage, pmap_remove_range()
1072				 * must work on level 2 instead of level 1; and we're
1073				 * only passing a single level 2 entry instead of a
1074				 * level 1 range.
1075				 */
1076				spte = pde;
1077				epte = spte+1; /* excluded */
1078			} else {
1079				spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
1080				spte = &spte[ptenum(s64)];
1081				epte = &spte[intel_btop(l64 - s64)];
1082			}
1083			pmap_remove_range(map, s64, spte, epte);
1084		}
1085		s64 = l64;
1086
1087		if (s64 < e64 && rdtsc64() >= deadline) {
1088			PMAP_UNLOCK(map)
1089			PMAP_LOCK(map)
1090			deadline = rdtsc64() + max_preemption_latency_tsc;
1091		}
1092	}
1093
1094	PMAP_UNLOCK(map);
1095
1096	PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
1097		   map, 0, 0, 0, 0);
1098
1099}
1100
1101/*
1102 *	Routine:	pmap_page_protect
1103 *
1104 *	Function:
1105 *		Lower the permission for all mappings to a given
1106 *		page.
1107 */
1108void
1109pmap_page_protect(
1110        ppnum_t         pn,
1111	vm_prot_t	prot)
1112{
1113	pv_hashed_entry_t	pvh_eh = PV_HASHED_ENTRY_NULL;
1114	pv_hashed_entry_t	pvh_et = PV_HASHED_ENTRY_NULL;
1115	pv_hashed_entry_t	nexth;
1116	int			pvh_cnt = 0;
1117	pv_rooted_entry_t	pv_h;
1118	pv_rooted_entry_t	pv_e;
1119	pv_hashed_entry_t	pvh_e;
1120	pt_entry_t		*pte;
1121	int			pai;
1122	pmap_t			pmap;
1123	boolean_t		remove;
1124
1125	pmap_intr_assert();
1126	assert(pn != vm_page_fictitious_addr);
1127	if (pn == vm_page_guard_addr)
1128		return;
1129
1130	pai = ppn_to_pai(pn);
1131
1132	if (!IS_MANAGED_PAGE(pai)) {
1133		/*
1134	         *	Not a managed page.
1135	         */
1136		return;
1137	}
1138	PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
1139		   pn, prot, 0, 0, 0);
1140
1141	/*
1142	 * Determine the new protection.
1143	 */
1144	switch (prot) {
1145	case VM_PROT_READ:
1146	case VM_PROT_READ | VM_PROT_EXECUTE:
1147		remove = FALSE;
1148		break;
1149	case VM_PROT_ALL:
1150		return;		/* nothing to do */
1151	default:
1152		remove = TRUE;
1153		break;
1154	}
1155
1156	pv_h = pai_to_pvh(pai);
1157
1158	LOCK_PVH(pai);
1159
1160
1161	/*
1162	 * Walk down PV list, if any, changing or removing all mappings.
1163	 */
1164	if (pv_h->pmap == PMAP_NULL)
1165		goto done;
1166
1167	pv_e = pv_h;
1168	pvh_e = (pv_hashed_entry_t) pv_e;	/* cheat */
1169
1170	do {
1171		vm_map_offset_t vaddr;
1172
1173		pmap = pv_e->pmap;
1174		vaddr = pv_e->va;
1175		pte = pmap_pte(pmap, vaddr);
1176
1177		pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1178		    "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1179
1180		if (0 == pte) {
1181			panic("pmap_page_protect() "
1182				"pmap=%p pn=0x%x vaddr=0x%llx\n",
1183				pmap, pn, vaddr);
1184		}
1185		nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1186
1187		/*
1188		 * Remove the mapping if new protection is NONE
1189		 */
1190		if (remove) {
1191			/*
1192		         * Remove the mapping, collecting dirty bits.
1193		         */
1194			pmap_update_pte(pte, INTEL_PTE_VALID, 0);
1195
1196			/* Remove per-pmap wired count */
1197			if (iswired(*pte)) {
1198				OSAddAtomic(-1, &pmap->stats.wired_count);
1199				pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1200			}
1201
1202			PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1203			pmap_phys_attributes[pai] |=
1204			    *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1205			pmap_store_pte(pte, 0);
1206
1207#if TESTING
1208			if (pmap->stats.resident_count < 1)
1209				panic("pmap_page_protect: resident_count");
1210#endif
1211			pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1212			assert(pmap->stats.resident_count >= 1);
1213			OSAddAtomic(-1,  &pmap->stats.resident_count);
1214			/*
1215		         * Deal with the pv_rooted_entry.
1216		         */
1217
1218			if (pv_e == pv_h) {
1219				/*
1220				 * Fix up head later.
1221				 */
1222				pv_h->pmap = PMAP_NULL;
1223			} else {
1224				/*
1225				 * Delete this entry.
1226				 */
1227				pv_hash_remove(pvh_e);
1228				pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1229				pvh_eh = pvh_e;
1230
1231				if (pvh_et == PV_HASHED_ENTRY_NULL)
1232					pvh_et = pvh_e;
1233				pvh_cnt++;
1234			}
1235		} else {
1236			/*
1237		         * Write-protect, after opportunistic refmod collect
1238		         */
1239			pmap_phys_attributes[pai] |=
1240			    *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1241			pmap_update_pte(pte, INTEL_PTE_WRITE, 0);
1242			PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1243		}
1244		pvh_e = nexth;
1245	} while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1246
1247
1248	/*
1249	 * If pv_head mapping was removed, fix it up.
1250	 */
1251	if (pv_h->pmap == PMAP_NULL) {
1252		pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1253
1254		if (pvh_e != (pv_hashed_entry_t) pv_h) {
1255			pv_hash_remove(pvh_e);
1256			pv_h->pmap = pvh_e->pmap;
1257			pv_h->va = pvh_e->va;
1258			pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1259			pvh_eh = pvh_e;
1260
1261			if (pvh_et == PV_HASHED_ENTRY_NULL)
1262				pvh_et = pvh_e;
1263			pvh_cnt++;
1264		}
1265	}
1266	if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1267		PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1268	}
1269done:
1270	UNLOCK_PVH(pai);
1271
1272	PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
1273		   0, 0, 0, 0, 0);
1274}
1275
1276/*
1277 *	Clear specified attribute bits.
1278 */
1279void
1280phys_attribute_clear(
1281	ppnum_t		pn,
1282	int		bits)
1283{
1284	pv_rooted_entry_t	pv_h;
1285	pv_hashed_entry_t	pv_e;
1286	pt_entry_t		*pte;
1287	int			pai;
1288	pmap_t			pmap;
1289	char			attributes = 0;
1290
1291	pmap_intr_assert();
1292	assert(pn != vm_page_fictitious_addr);
1293	if (pn == vm_page_guard_addr)
1294		return;
1295
1296	pai = ppn_to_pai(pn);
1297
1298	if (!IS_MANAGED_PAGE(pai)) {
1299		/*
1300		 *	Not a managed page.
1301		 */
1302		return;
1303	}
1304
1305	PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
1306		   pn, bits, 0, 0, 0);
1307
1308	pv_h = pai_to_pvh(pai);
1309
1310	LOCK_PVH(pai);
1311
1312	/*
1313	 * Walk down PV list, clearing all modify or reference bits.
1314	 * We do not have to lock the pv_list because we have
1315	 * the per-pmap lock
1316	 */
1317	if (pv_h->pmap != PMAP_NULL) {
1318		/*
1319		 * There are some mappings.
1320		 */
1321
1322		pv_e = (pv_hashed_entry_t)pv_h;
1323
1324		do {
1325			vm_map_offset_t	va;
1326
1327			pmap = pv_e->pmap;
1328			va = pv_e->va;
1329
1330			 /*
1331			  * Clear modify and/or reference bits.
1332			  */
1333			pte = pmap_pte(pmap, va);
1334			attributes |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1335			pmap_update_pte(pte, bits, 0);
1336			/* Ensure all processors using this translation
1337			 * invalidate this TLB entry. The invalidation *must*
1338			 * follow the PTE update, to ensure that the TLB
1339			 * shadow of the 'D' bit (in particular) is
1340			 * synchronized with the updated PTE.
1341			 */
1342			PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1343
1344			pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1345
1346		} while (pv_e != (pv_hashed_entry_t)pv_h);
1347	}
1348	/* Opportunistic refmod collection, annulled
1349	 * if both REF and MOD are being cleared.
1350	 */
1351
1352	pmap_phys_attributes[pai] |= attributes;
1353	pmap_phys_attributes[pai] &= (~bits);
1354
1355	UNLOCK_PVH(pai);
1356
1357	PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
1358		   0, 0, 0, 0, 0);
1359}
1360
1361/*
1362 *	Check specified attribute bits.
1363 */
1364int
1365phys_attribute_test(
1366	ppnum_t		pn,
1367	int		bits)
1368{
1369	pv_rooted_entry_t	pv_h;
1370	pv_hashed_entry_t	pv_e;
1371	pt_entry_t		*pte;
1372	int			pai;
1373	pmap_t			pmap;
1374	int			attributes = 0;
1375
1376	pmap_intr_assert();
1377	assert(pn != vm_page_fictitious_addr);
1378	if (pn == vm_page_guard_addr)
1379		return 0;
1380
1381	pai = ppn_to_pai(pn);
1382
1383	if (!IS_MANAGED_PAGE(pai)) {
1384		/*
1385		 *	Not a managed page.
1386		 */
1387		return 0;
1388	}
1389
1390	/*
1391	 * Fast check...  if bits already collected
1392	 * no need to take any locks...
1393	 * if not set, we need to recheck after taking
1394	 * the lock in case they got pulled in while
1395	 * we were waiting for the lock
1396	 */
1397	if ((pmap_phys_attributes[pai] & bits) == bits)
1398		return bits;
1399
1400	pv_h = pai_to_pvh(pai);
1401
1402	LOCK_PVH(pai);
1403
1404	attributes = pmap_phys_attributes[pai] & bits;
1405
1406
1407	/*
1408	 * Walk down PV list, checking the mappings until we
1409	 * reach the end or we've found the desired attributes.
1410	 */
1411	if (attributes != bits &&
1412	    pv_h->pmap != PMAP_NULL) {
1413		/*
1414		 * There are some mappings.
1415		 */
1416		pv_e = (pv_hashed_entry_t)pv_h;
1417		do {
1418			vm_map_offset_t va;
1419
1420			pmap = pv_e->pmap;
1421			va = pv_e->va;
1422			/*
1423	 		 * pick up modify and/or reference bits from mapping
1424			 */
1425
1426			pte = pmap_pte(pmap, va);
1427			attributes |= (int)(*pte & bits);
1428
1429			pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1430
1431		} while ((attributes != bits) &&
1432			 (pv_e != (pv_hashed_entry_t)pv_h));
1433	}
1434	pmap_phys_attributes[pai] |= attributes;
1435
1436	UNLOCK_PVH(pai);
1437	return (attributes);
1438}
1439
1440/*
1441 *	Routine:	pmap_change_wiring
1442 *	Function:	Change the wiring attribute for a map/virtual-address
1443 *			pair.
1444 *	In/out conditions:
1445 *			The mapping must already exist in the pmap.
1446 */
1447void
1448pmap_change_wiring(
1449	pmap_t		map,
1450	vm_map_offset_t	vaddr,
1451	boolean_t	wired)
1452{
1453	pt_entry_t	*pte;
1454
1455	PMAP_LOCK(map);
1456
1457	if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
1458		panic("pmap_change_wiring: pte missing");
1459
1460	if (wired && !iswired(*pte)) {
1461		/*
1462		 * wiring down mapping
1463		 */
1464		pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
1465		OSAddAtomic(+1,  &map->stats.wired_count);
1466		pmap_update_pte(pte, 0, INTEL_PTE_WIRED);
1467	}
1468	else if (!wired && iswired(*pte)) {
1469		/*
1470		 * unwiring mapping
1471		 */
1472		assert(map->stats.wired_count >= 1);
1473		OSAddAtomic(-1,  &map->stats.wired_count);
1474		pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
1475		pmap_update_pte(pte, INTEL_PTE_WIRED, 0);
1476	}
1477
1478	PMAP_UNLOCK(map);
1479}
1480
1481/*
1482 *	"Backdoor" direct map routine for early mappings.
1483 * 	Useful for mapping memory outside the range
1484 *      Sets A, D and NC if requested
1485 */
1486
1487vm_offset_t
1488pmap_map_bd(
1489	vm_offset_t	virt,
1490	vm_map_offset_t	start_addr,
1491	vm_map_offset_t	end_addr,
1492	vm_prot_t	prot,
1493	unsigned int	flags)
1494{
1495	pt_entry_t	template;
1496	pt_entry_t	*pte;
1497	spl_t           spl;
1498	vm_offset_t	base = virt;
1499	template = pa_to_pte(start_addr)
1500		| INTEL_PTE_REF
1501		| INTEL_PTE_MOD
1502		| INTEL_PTE_WIRED
1503		| INTEL_PTE_VALID;
1504
1505	if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
1506		template |= INTEL_PTE_NCACHE;
1507		if (!(flags & (VM_MEM_GUARDED)))
1508			template |= INTEL_PTE_PTA;
1509	}
1510
1511#if    defined(__x86_64__)
1512	if ((prot & VM_PROT_EXECUTE) == 0)
1513		template |= INTEL_PTE_NX;
1514#endif
1515
1516	if (prot & VM_PROT_WRITE)
1517		template |= INTEL_PTE_WRITE;
1518
1519	while (start_addr < end_addr) {
1520	        spl = splhigh();
1521		pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
1522		if (pte == PT_ENTRY_NULL) {
1523			panic("pmap_map_bd: Invalid kernel address\n");
1524		}
1525		pmap_store_pte(pte, template);
1526		splx(spl);
1527		pte_increment_pa(template);
1528		virt += PAGE_SIZE;
1529		start_addr += PAGE_SIZE;
1530	}
1531	flush_tlb_raw();
1532	PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
1533	return(virt);
1534}
1535