1/* 2 * Copyright (C) 2010 Andreas Tobler 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 18 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 22 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include <sys/cdefs.h>
| 1/* 2 * Copyright (C) 2010 Andreas Tobler 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 18 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 22 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include <sys/cdefs.h>
|
27__FBSDID("$FreeBSD: head/sys/powerpc/pseries/mmu_phyp.c 278456 2015-02-09 15:58:27Z nwhitehorn $");
| 27__FBSDID("$FreeBSD: head/sys/powerpc/pseries/mmu_phyp.c 279252 2015-02-24 21:37:20Z nwhitehorn $");
|
28 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/ktr.h> 32#include <sys/lock.h>
| 28 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/ktr.h> 32#include <sys/lock.h>
|
33#include <sys/msgbuf.h>
| 33#include <sys/rwlock.h>
|
34#include <sys/mutex.h> 35#include <sys/proc.h> 36#include <sys/sysctl.h> 37#include <sys/systm.h> 38#include <sys/vmmeter.h> 39 40#include <dev/ofw/openfirm.h> 41#include <machine/ofw_machdep.h> 42 43#include <vm/vm.h> 44#include <vm/vm_param.h> 45#include <vm/vm_kern.h> 46#include <vm/vm_page.h> 47#include <vm/vm_map.h> 48#include <vm/vm_object.h> 49#include <vm/vm_extern.h> 50#include <vm/vm_pageout.h> 51#include <vm/uma.h> 52 53#include <powerpc/aim/mmu_oea64.h> 54 55#include "mmu_if.h" 56#include "moea64_if.h" 57 58#include "phyp-hvcall.h" 59 60extern int n_slbs; 61
| 34#include <sys/mutex.h> 35#include <sys/proc.h> 36#include <sys/sysctl.h> 37#include <sys/systm.h> 38#include <sys/vmmeter.h> 39 40#include <dev/ofw/openfirm.h> 41#include <machine/ofw_machdep.h> 42 43#include <vm/vm.h> 44#include <vm/vm_param.h> 45#include <vm/vm_kern.h> 46#include <vm/vm_page.h> 47#include <vm/vm_map.h> 48#include <vm/vm_object.h> 49#include <vm/vm_extern.h> 50#include <vm/vm_pageout.h> 51#include <vm/uma.h> 52 53#include <powerpc/aim/mmu_oea64.h> 54 55#include "mmu_if.h" 56#include "moea64_if.h" 57 58#include "phyp-hvcall.h" 59 60extern int n_slbs; 61
|
| 62static struct rwlock mphyp_eviction_lock; 63
|
62/* 63 * Kernel MMU interface 64 */ 65 66static void mphyp_bootstrap(mmu_t mmup, vm_offset_t kernelstart, 67 vm_offset_t kernelend); 68static void mphyp_cpu_bootstrap(mmu_t mmup, int ap);
| 64/* 65 * Kernel MMU interface 66 */ 67 68static void mphyp_bootstrap(mmu_t mmup, vm_offset_t kernelstart, 69 vm_offset_t kernelend); 70static void mphyp_cpu_bootstrap(mmu_t mmup, int ap);
|
69static void mphyp_pte_synch(mmu_t, uintptr_t pt, struct lpte *pvo_pt); 70static void mphyp_pte_clear(mmu_t, uintptr_t pt, struct lpte *pvo_pt, 71 uint64_t vpn, u_int64_t ptebit); 72static void mphyp_pte_unset(mmu_t, uintptr_t pt, struct lpte *pvo_pt, 73 uint64_t vpn); 74static void mphyp_pte_change(mmu_t, uintptr_t pt, struct lpte *pvo_pt, 75 uint64_t vpn); 76static int mphyp_pte_insert(mmu_t, u_int ptegidx, struct lpte *pvo_pt); 77static uintptr_t mphyp_pvo_to_pte(mmu_t, const struct pvo_entry *pvo);
| 71static int64_t mphyp_pte_synch(mmu_t, struct pvo_entry *pvo); 72static int64_t mphyp_pte_clear(mmu_t, struct pvo_entry *pvo, uint64_t ptebit); 73static int64_t mphyp_pte_unset(mmu_t, struct pvo_entry *pvo); 74static int mphyp_pte_insert(mmu_t, struct pvo_entry *pvo);
|
78
| 75
|
79#define VSID_HASH_MASK 0x0000007fffffffffULL 80 81
| |
82static mmu_method_t mphyp_methods[] = { 83 MMUMETHOD(mmu_bootstrap, mphyp_bootstrap), 84 MMUMETHOD(mmu_cpu_bootstrap, mphyp_cpu_bootstrap), 85 86 MMUMETHOD(moea64_pte_synch, mphyp_pte_synch), 87 MMUMETHOD(moea64_pte_clear, mphyp_pte_clear), 88 MMUMETHOD(moea64_pte_unset, mphyp_pte_unset),
| 76static mmu_method_t mphyp_methods[] = { 77 MMUMETHOD(mmu_bootstrap, mphyp_bootstrap), 78 MMUMETHOD(mmu_cpu_bootstrap, mphyp_cpu_bootstrap), 79 80 MMUMETHOD(moea64_pte_synch, mphyp_pte_synch), 81 MMUMETHOD(moea64_pte_clear, mphyp_pte_clear), 82 MMUMETHOD(moea64_pte_unset, mphyp_pte_unset),
|
89 MMUMETHOD(moea64_pte_change, mphyp_pte_change),
| |
90 MMUMETHOD(moea64_pte_insert, mphyp_pte_insert),
| 83 MMUMETHOD(moea64_pte_insert, mphyp_pte_insert),
|
91 MMUMETHOD(moea64_pvo_to_pte, mphyp_pvo_to_pte),
| |
92
| 84
|
| 85 /* XXX: pmap_copy_page, pmap_init_page with H_PAGE_INIT */ 86
|
93 { 0, 0 } 94}; 95 96MMU_DEF_INHERIT(pseries_mmu, "mmu_phyp", mphyp_methods, 0, oea64_mmu); 97
| 87 { 0, 0 } 88}; 89 90MMU_DEF_INHERIT(pseries_mmu, "mmu_phyp", mphyp_methods, 0, oea64_mmu); 91
|
| 92static int brokenkvm = 0; 93
|
98static void
| 94static void
|
| 95print_kvm_bug_warning(void *data) 96{ 97 98 if (brokenkvm) 99 printf("WARNING: Running on a broken hypervisor that does " 100 "not support mandatory H_CLEAR_MOD and H_CLEAR_REF " 101 "hypercalls. Performance will be suboptimal.\n"); 102} 103 104SYSINIT(kvmbugwarn1, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1, 105 print_kvm_bug_warning, NULL); 106SYSINIT(kvmbugwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1, print_kvm_bug_warning, 107 NULL); 108 109static void
|
99mphyp_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 100{ 101 uint64_t final_pteg_count = 0; 102 char buf[8]; 103 uint32_t prop[2]; 104 uint32_t nptlp, shift = 0, slb_encoding = 0; 105 uint32_t lp_size, lp_encoding; 106 phandle_t dev, node, root; 107 int idx, len, res; 108
| 110mphyp_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 111{ 112 uint64_t final_pteg_count = 0; 113 char buf[8]; 114 uint32_t prop[2]; 115 uint32_t nptlp, shift = 0, slb_encoding = 0; 116 uint32_t lp_size, lp_encoding; 117 phandle_t dev, node, root; 118 int idx, len, res; 119
|
| 120 rw_init(&mphyp_eviction_lock, "pte eviction"); 121
|
109 moea64_early_bootstrap(mmup, kernelstart, kernelend); 110 111 root = OF_peer(0); 112 113 dev = OF_child(root); 114 while (dev != 0) { 115 res = OF_getprop(dev, "name", buf, sizeof(buf)); 116 if (res > 0 && strcmp(buf, "cpus") == 0) 117 break; 118 dev = OF_peer(dev); 119 } 120 121 node = OF_child(dev); 122 123 while (node != 0) { 124 res = OF_getprop(node, "device_type", buf, sizeof(buf)); 125 if (res > 0 && strcmp(buf, "cpu") == 0) 126 break; 127 node = OF_peer(node); 128 } 129 130 res = OF_getprop(node, "ibm,pft-size", prop, sizeof(prop)); 131 if (res <= 0) 132 panic("mmu_phyp: unknown PFT size"); 133 final_pteg_count = 1 << prop[1]; 134 res = OF_getprop(node, "ibm,slb-size", prop, sizeof(prop[0])); 135 if (res > 0) 136 n_slbs = prop[0]; 137 138 moea64_pteg_count = final_pteg_count / sizeof(struct lpteg); 139 140 /* 141 * Scan the large page size property for PAPR compatible machines. 142 * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties' 143 * for the encoding of the property. 144 */ 145 146 len = OF_getproplen(node, "ibm,segment-page-sizes"); 147 if (len > 0) { 148 /* 149 * We have to use a variable length array on the stack 150 * since we have very limited stack space. 151 */ 152 pcell_t arr[len/sizeof(cell_t)]; 153 res = OF_getencprop(node, "ibm,segment-page-sizes", arr, 154 sizeof(arr)); 155 len /= 4; 156 idx = 0; 157 while (len > 0) { 158 shift = arr[idx]; 159 slb_encoding = arr[idx + 1]; 160 nptlp = arr[idx + 2]; 161 idx += 3; 162 len -= 3; 163 while (len > 0 && nptlp) { 164 lp_size = arr[idx]; 165 lp_encoding = arr[idx+1]; 166 if (slb_encoding == SLBV_L && lp_encoding == 0) 167 break; 168 169 idx += 2; 170 len -= 2; 171 nptlp--; 172 } 173 if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0) 174 break; 175 } 176 177 if (len == 0) 178 panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) " 179 "not supported by this system. Please enable huge " 180 "page backing if running under PowerKVM."); 181 182 moea64_large_page_shift = shift; 183 moea64_large_page_size = 1ULL << lp_size; 184 } 185 186 moea64_mid_bootstrap(mmup, kernelstart, kernelend); 187 moea64_late_bootstrap(mmup, kernelstart, kernelend);
| 122 moea64_early_bootstrap(mmup, kernelstart, kernelend); 123 124 root = OF_peer(0); 125 126 dev = OF_child(root); 127 while (dev != 0) { 128 res = OF_getprop(dev, "name", buf, sizeof(buf)); 129 if (res > 0 && strcmp(buf, "cpus") == 0) 130 break; 131 dev = OF_peer(dev); 132 } 133 134 node = OF_child(dev); 135 136 while (node != 0) { 137 res = OF_getprop(node, "device_type", buf, sizeof(buf)); 138 if (res > 0 && strcmp(buf, "cpu") == 0) 139 break; 140 node = OF_peer(node); 141 } 142 143 res = OF_getprop(node, "ibm,pft-size", prop, sizeof(prop)); 144 if (res <= 0) 145 panic("mmu_phyp: unknown PFT size"); 146 final_pteg_count = 1 << prop[1]; 147 res = OF_getprop(node, "ibm,slb-size", prop, sizeof(prop[0])); 148 if (res > 0) 149 n_slbs = prop[0]; 150 151 moea64_pteg_count = final_pteg_count / sizeof(struct lpteg); 152 153 /* 154 * Scan the large page size property for PAPR compatible machines. 155 * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties' 156 * for the encoding of the property. 157 */ 158 159 len = OF_getproplen(node, "ibm,segment-page-sizes"); 160 if (len > 0) { 161 /* 162 * We have to use a variable length array on the stack 163 * since we have very limited stack space. 164 */ 165 pcell_t arr[len/sizeof(cell_t)]; 166 res = OF_getencprop(node, "ibm,segment-page-sizes", arr, 167 sizeof(arr)); 168 len /= 4; 169 idx = 0; 170 while (len > 0) { 171 shift = arr[idx]; 172 slb_encoding = arr[idx + 1]; 173 nptlp = arr[idx + 2]; 174 idx += 3; 175 len -= 3; 176 while (len > 0 && nptlp) { 177 lp_size = arr[idx]; 178 lp_encoding = arr[idx+1]; 179 if (slb_encoding == SLBV_L && lp_encoding == 0) 180 break; 181 182 idx += 2; 183 len -= 2; 184 nptlp--; 185 } 186 if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0) 187 break; 188 } 189 190 if (len == 0) 191 panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) " 192 "not supported by this system. Please enable huge " 193 "page backing if running under PowerKVM."); 194 195 moea64_large_page_shift = shift; 196 moea64_large_page_size = 1ULL << lp_size; 197 } 198 199 moea64_mid_bootstrap(mmup, kernelstart, kernelend); 200 moea64_late_bootstrap(mmup, kernelstart, kernelend);
|
| 201 202 /* Test for broken versions of KVM that don't conform to the spec */ 203 if (phyp_hcall(H_CLEAR_MOD, 0, 0) == H_FUNCTION) 204 brokenkvm = 1;
|
188} 189 190static void 191mphyp_cpu_bootstrap(mmu_t mmup, int ap) 192{ 193 struct slb *slb = PCPU_GET(slb); 194 register_t seg0; 195 int i; 196 197 /* 198 * Install kernel SLB entries 199 */ 200 201 __asm __volatile ("slbia"); 202 __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0)); 203 for (i = 0; i < 64; i++) { 204 if (!(slb[i].slbe & SLBE_VALID)) 205 continue; 206 207 __asm __volatile ("slbmte %0, %1" :: 208 "r"(slb[i].slbv), "r"(slb[i].slbe)); 209 } 210} 211
| 205} 206 207static void 208mphyp_cpu_bootstrap(mmu_t mmup, int ap) 209{ 210 struct slb *slb = PCPU_GET(slb); 211 register_t seg0; 212 int i; 213 214 /* 215 * Install kernel SLB entries 216 */ 217 218 __asm __volatile ("slbia"); 219 __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0)); 220 for (i = 0; i < 64; i++) { 221 if (!(slb[i].slbe & SLBE_VALID)) 222 continue; 223 224 __asm __volatile ("slbmte %0, %1" :: 225 "r"(slb[i].slbv), "r"(slb[i].slbe)); 226 } 227} 228
|
212static void 213mphyp_pte_synch(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt)
| 229static int64_t 230mphyp_pte_synch(mmu_t mmu, struct pvo_entry *pvo)
|
214{ 215 struct lpte pte; 216 uint64_t junk; 217 218 __asm __volatile("ptesync");
| 231{ 232 struct lpte pte; 233 uint64_t junk; 234 235 __asm __volatile("ptesync");
|
219 phyp_pft_hcall(H_READ, 0, slot, 0, 0, &pte.pte_hi, &pte.pte_lo, 220 &junk);
| 236 phyp_pft_hcall(H_READ, 0, pvo->pvo_pte.slot, 0, 0, &pte.pte_hi, 237 &pte.pte_lo, &junk); 238 if ((pte.pte_hi & LPTE_AVPN_MASK) != 239 ((pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) & 240 LPTE_AVPN_MASK)) 241 return (-1); 242 if (!(pte.pte_hi & LPTE_VALID)) 243 return (-1);
|
221
| 244
|
222 pvo_pt->pte_lo |= pte.pte_lo & (LPTE_CHG | LPTE_REF);
| 245 return (pte.pte_lo & (LPTE_CHG | LPTE_REF));
|
223} 224
| 246} 247
|
225static void 226mphyp_pte_clear(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt, uint64_t vpn, 227 u_int64_t ptebit)
| 248static int64_t 249mphyp_pte_clear(mmu_t mmu, struct pvo_entry *pvo, uint64_t ptebit)
|
228{
| 250{
|
| 251 int64_t refchg; 252 uint64_t ptelo, junk; 253 int err;
|
229
| 254
|
230 if (ptebit & LPTE_CHG) 231 phyp_hcall(H_CLEAR_MOD, 0, slot); 232 if (ptebit & LPTE_REF) 233 phyp_hcall(H_CLEAR_REF, 0, slot);
| 255 /* 256 * This involves two steps (synch and clear) so we need the entry 257 * not to change in the middle. We are protected against deliberate 258 * unset by virtue of holding the pmap lock. Protection against 259 * incidental unset (page table eviction) comes from holding the 260 * shared eviction lock. 261 */ 262 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 263 rw_rlock(&mphyp_eviction_lock); 264 265 refchg = mphyp_pte_synch(mmu, pvo); 266 if (refchg < 0) { 267 rw_runlock(&mphyp_eviction_lock); 268 return (refchg); 269 } 270 271 if (brokenkvm) { 272 /* 273 * No way to clear either bit, which is total madness. 274 * Pessimistically claim that, once modified, it stays so 275 * forever and that it is never referenced. 276 */ 277 rw_runlock(&mphyp_eviction_lock); 278 return (refchg & ~LPTE_REF); 279 } 280 281 if (ptebit & LPTE_CHG) { 282 err = phyp_pft_hcall(H_CLEAR_MOD, 0, pvo->pvo_pte.slot, 0, 0, 283 &ptelo, &junk, &junk); 284 KASSERT(err == H_SUCCESS, 285 ("Error clearing page change bit: %d", err)); 286 refchg |= (ptelo & LPTE_CHG); 287 } 288 if (ptebit & LPTE_REF) { 289 err = phyp_pft_hcall(H_CLEAR_REF, 0, pvo->pvo_pte.slot, 0, 0, 290 &ptelo, &junk, &junk); 291 KASSERT(err == H_SUCCESS, 292 ("Error clearing page reference bit: %d", err)); 293 refchg |= (ptelo & LPTE_REF); 294 } 295 296 rw_runlock(&mphyp_eviction_lock); 297 298 return (refchg);
|
234} 235
| 299} 300
|
236static void 237mphyp_pte_unset(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt, uint64_t vpn)
| 301static int64_t 302mphyp_pte_unset(mmu_t mmu, struct pvo_entry *pvo)
|
238{ 239 struct lpte pte; 240 uint64_t junk; 241 int err; 242
| 303{ 304 struct lpte pte; 305 uint64_t junk; 306 int err; 307
|
243 pvo_pt->pte_hi &= ~LPTE_VALID; 244 err = phyp_pft_hcall(H_REMOVE, 1UL << 31, slot, 245 pvo_pt->pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo, 246 &junk); 247 KASSERT(err == H_SUCCESS, ("Error removing page: %d", err));
| 308 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
|
248
| 309
|
249 pvo_pt->pte_lo |= pte.pte_lo & (LPTE_CHG | LPTE_REF); 250}
| 310 moea64_pte_from_pvo(pvo, &pte);
|
251
| 311
|
252static void 253mphyp_pte_change(mmu_t mmu, uintptr_t slot, struct lpte *pvo_pt, uint64_t vpn) 254{ 255 struct lpte evicted; 256 uint64_t index, junk; 257 int64_t result;
| 312 err = phyp_pft_hcall(H_REMOVE, H_AVPN, pvo->pvo_pte.slot, 313 pte.pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo, 314 &junk); 315 KASSERT(err == H_SUCCESS || err == H_NOT_FOUND, 316 ("Error removing page: %d", err));
|
258
| 317
|
259 /* 260 * NB: this is protected by the global table lock, so this two-step 261 * is safe, except for the scratch-page case. No CPUs on which we run 262 * this code should be using scratch pages. 263 */ 264 KASSERT(!(pvo_pt->pte_hi & LPTE_LOCKED), 265 ("Locked pages not supported on PHYP"));
| 318 if (err == H_NOT_FOUND) { 319 moea64_pte_overflow--; 320 return (-1); 321 }
|
266
| 322
|
267 /* XXX: optimization using H_PROTECT for common case? */ 268 mphyp_pte_unset(mmu, slot, pvo_pt, vpn); 269 pvo_pt->pte_hi |= LPTE_VALID; 270 result = phyp_pft_hcall(H_ENTER, H_EXACT, slot, pvo_pt->pte_hi, 271 pvo_pt->pte_lo, &index, &evicted.pte_lo, &junk); 272 if (result != H_SUCCESS) 273 panic("mphyp_pte_change() insertion failure: %ld\n", result);
| 323 return (pte.pte_lo & (LPTE_REF | LPTE_CHG));
|
274} 275
| 324} 325
|
276static __inline int 277mphyp_pte_spillable_ident(u_int ptegidx, struct lpte *to_evict)
| 326static uintptr_t 327mphyp_pte_spillable_ident(uintptr_t ptegbase, struct lpte *to_evict)
|
278{ 279 uint64_t slot, junk, k; 280 struct lpte pt; 281 int i, j; 282 283 /* Start at a random slot */ 284 i = mftb() % 8; 285 k = -1; 286 for (j = 0; j < 8; j++) {
| 328{ 329 uint64_t slot, junk, k; 330 struct lpte pt; 331 int i, j; 332 333 /* Start at a random slot */ 334 i = mftb() % 8; 335 k = -1; 336 for (j = 0; j < 8; j++) {
|
287 slot = (ptegidx << 3) + (i + j) % 8; 288 phyp_pft_hcall(H_READ, 0, slot, 0, 0, &pt.pte_hi, &pt.pte_lo, 289 &junk);
| 337 slot = ptegbase + (i + j) % 8; 338 phyp_pft_hcall(H_READ, 0, slot, 0, 0, &pt.pte_hi, 339 &pt.pte_lo, &junk);
|
290 291 if (pt.pte_hi & LPTE_WIRED) 292 continue; 293 294 /* This is a candidate, so remember it */ 295 k = slot; 296 297 /* Try to get a page that has not been used lately */
| 340 341 if (pt.pte_hi & LPTE_WIRED) 342 continue; 343 344 /* This is a candidate, so remember it */ 345 k = slot; 346 347 /* Try to get a page that has not been used lately */
|
298 if (!(pt.pte_lo & LPTE_REF)) {
| 348 if (!(pt.pte_hi & LPTE_VALID) || !(pt.pte_lo & LPTE_REF)) {
|
299 memcpy(to_evict, &pt, sizeof(struct lpte)); 300 return (k); 301 } 302 } 303 304 if (k == -1) 305 return (k); 306 307 phyp_pft_hcall(H_READ, 0, k, 0, 0, &to_evict->pte_hi, 308 &to_evict->pte_lo, &junk); 309 return (k); 310} 311 312static int
| 349 memcpy(to_evict, &pt, sizeof(struct lpte)); 350 return (k); 351 } 352 } 353 354 if (k == -1) 355 return (k); 356 357 phyp_pft_hcall(H_READ, 0, k, 0, 0, &to_evict->pte_hi, 358 &to_evict->pte_lo, &junk); 359 return (k); 360} 361 362static int
|
313mphyp_pte_insert(mmu_t mmu, u_int ptegidx, struct lpte *pvo_pt)
| 363mphyp_pte_insert(mmu_t mmu, struct pvo_entry *pvo)
|
314{ 315 int64_t result;
| 364{ 365 int64_t result;
|
316 struct lpte evicted; 317 struct pvo_entry *pvo; 318 uint64_t index, junk; 319 u_int pteg_bktidx;
| 366 struct lpte evicted, pte; 367 uint64_t index, junk, lastptelo;
|
320
| 368
|
321 /* Check for locked pages, which we can't support on this system */ 322 KASSERT(!(pvo_pt->pte_hi & LPTE_LOCKED), 323 ("Locked pages not supported on PHYP"));
| 369 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED);
|
324 325 /* Initialize PTE */
| 370 371 /* Initialize PTE */
|
326 pvo_pt->pte_hi |= LPTE_VALID; 327 pvo_pt->pte_hi &= ~LPTE_HID;
| 372 moea64_pte_from_pvo(pvo, &pte);
|
328 evicted.pte_hi = 0; 329
| 373 evicted.pte_hi = 0; 374
|
| 375 /* Make sure further insertion is locked out during evictions */ 376 rw_rlock(&mphyp_eviction_lock); 377
|
330 /* 331 * First try primary hash. 332 */
| 378 /* 379 * First try primary hash. 380 */
|
333 pteg_bktidx = ptegidx; 334 result = phyp_pft_hcall(H_ENTER, 0, pteg_bktidx << 3, pvo_pt->pte_hi, 335 pvo_pt->pte_lo, &index, &evicted.pte_lo, &junk); 336 if (result == H_SUCCESS) 337 return (index & 0x07);
| 381 pvo->pvo_pte.slot &= ~7UL; /* Base slot address */ 382 result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte.pte_hi, 383 pte.pte_lo, &index, &evicted.pte_lo, &junk); 384 if (result == H_SUCCESS) { 385 rw_runlock(&mphyp_eviction_lock); 386 pvo->pvo_pte.slot = index; 387 return (0); 388 }
|
338 KASSERT(result == H_PTEG_FULL, ("Page insertion error: %ld "
| 389 KASSERT(result == H_PTEG_FULL, ("Page insertion error: %ld "
|
339 "(ptegidx: %#x/%#x, PTE %#lx/%#lx", result, ptegidx, 340 moea64_pteg_count, pvo_pt->pte_hi, pvo_pt->pte_lo));
| 390 "(ptegidx: %#zx/%#x, PTE %#lx/%#lx", result, pvo->pvo_pte.slot, 391 moea64_pteg_count, pte.pte_hi, pte.pte_lo));
|
341 342 /* 343 * Next try secondary hash. 344 */
| 392 393 /* 394 * Next try secondary hash. 395 */
|
345 pteg_bktidx ^= moea64_pteg_mask; 346 pvo_pt->pte_hi |= LPTE_HID; 347 result = phyp_pft_hcall(H_ENTER, 0, pteg_bktidx << 3, 348 pvo_pt->pte_hi, pvo_pt->pte_lo, &index, &evicted.pte_lo, &junk); 349 if (result == H_SUCCESS) 350 return (index & 0x07);
| 396 pvo->pvo_vaddr ^= PVO_HID; 397 pte.pte_hi ^= LPTE_HID; 398 pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); 399 400 result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, 401 pte.pte_hi, pte.pte_lo, &index, &evicted.pte_lo, &junk); 402 if (result == H_SUCCESS) { 403 rw_runlock(&mphyp_eviction_lock); 404 pvo->pvo_pte.slot = index; 405 return (0); 406 }
|
351 KASSERT(result == H_PTEG_FULL, ("Secondary page insertion error: %ld", 352 result)); 353 354 /* 355 * Out of luck. Find a PTE to sacrifice. 356 */
| 407 KASSERT(result == H_PTEG_FULL, ("Secondary page insertion error: %ld", 408 result)); 409 410 /* 411 * Out of luck. Find a PTE to sacrifice. 412 */
|
357 pteg_bktidx = ptegidx; 358 index = mphyp_pte_spillable_ident(pteg_bktidx, &evicted);
| 413 414 /* Lock out all insertions for a bit */ 415 if (!rw_try_upgrade(&mphyp_eviction_lock)) { 416 rw_runlock(&mphyp_eviction_lock); 417 rw_wlock(&mphyp_eviction_lock); 418 } 419 420 index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted);
|
359 if (index == -1L) {
| 421 if (index == -1L) {
|
360 pteg_bktidx ^= moea64_pteg_mask; 361 index = mphyp_pte_spillable_ident(pteg_bktidx, &evicted);
| 422 /* Try other hash table? */ 423 pvo->pvo_vaddr ^= PVO_HID; 424 pte.pte_hi ^= LPTE_HID; 425 pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); 426 index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted);
|
362 } 363 364 if (index == -1L) { 365 /* No freeable slots in either PTEG? We're hosed. */
| 427 } 428 429 if (index == -1L) { 430 /* No freeable slots in either PTEG? We're hosed. */
|
| 431 rw_wunlock(&mphyp_eviction_lock);
|
366 panic("mphyp_pte_insert: overflow"); 367 return (-1); 368 } 369
| 432 panic("mphyp_pte_insert: overflow"); 433 return (-1); 434 } 435
|
370 if (pteg_bktidx == ptegidx) 371 pvo_pt->pte_hi &= ~LPTE_HID; 372 else 373 pvo_pt->pte_hi |= LPTE_HID; 374 375 /* 376 * Synchronize the sacrifice PTE with its PVO, then mark both 377 * invalid. The PVO will be reused when/if the VM system comes 378 * here after a fault. 379 */ 380 381 if (evicted.pte_hi & LPTE_HID) 382 pteg_bktidx ^= moea64_pteg_mask; /* PTEs indexed by primary */ 383 384 LIST_FOREACH(pvo, &moea64_pvo_table[pteg_bktidx], pvo_olink) { 385 if (pvo->pvo_pte.lpte.pte_hi == evicted.pte_hi) { 386 KASSERT(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID, 387 ("Invalid PVO for valid PTE!")); 388 mphyp_pte_unset(mmu, index, &pvo->pvo_pte.lpte, 389 pvo->pvo_vpn); 390 PVO_PTEGIDX_CLR(pvo); 391 moea64_pte_overflow++; 392 break; 393 }
| 436 /* Victim acquired: update page before waving goodbye */ 437 if (evicted.pte_hi & LPTE_VALID) { 438 result = phyp_pft_hcall(H_REMOVE, H_AVPN, index, 439 evicted.pte_hi & LPTE_AVPN_MASK, 0, &junk, &lastptelo, 440 &junk); 441 moea64_pte_overflow++; 442 KASSERT(result == H_SUCCESS, 443 ("Error evicting page: %d", (int)result));
|
394 } 395
| 444 } 445
|
396 KASSERT((pvo->pvo_pte.lpte.pte_hi | LPTE_VALID) == evicted.pte_hi, 397 ("Unable to find PVO for spilled PTE")); 398
| |
399 /* 400 * Set the new PTE. 401 */
| 446 /* 447 * Set the new PTE. 448 */
|
402 result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pvo_pt->pte_hi, 403 pvo_pt->pte_lo, &index, &evicted.pte_lo, &junk);
| 449 result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pte.pte_hi, 450 pte.pte_lo, &index, &evicted.pte_lo, &junk); 451 rw_wunlock(&mphyp_eviction_lock); /* All clear */ 452 453 pvo->pvo_pte.slot = index;
|
404 if (result == H_SUCCESS)
| 454 if (result == H_SUCCESS)
|
405 return (index & 0x07);
| 455 return (0);
|
406 407 panic("Page replacement error: %ld", result);
| 456 457 panic("Page replacement error: %ld", result);
|
408 return (-1);
| 458 return (result);
|
409} 410
| 459} 460
|
411static __inline u_int 412va_to_pteg(uint64_t vsid, vm_offset_t addr, int large) 413{ 414 uint64_t hash; 415 int shift; 416 417 shift = large ? moea64_large_page_shift : ADDR_PIDX_SHFT; 418 hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)addr & ADDR_PIDX) >> 419 shift); 420 return (hash & moea64_pteg_mask); 421} 422 423static uintptr_t 424mphyp_pvo_to_pte(mmu_t mmu, const struct pvo_entry *pvo) 425{ 426 uint64_t vsid; 427 u_int ptegidx; 428 429 /* If the PTEG index is not set, then there is no page table entry */ 430 if (!PVO_PTEGIDX_ISSET(pvo)) 431 return (-1); 432 433 vsid = PVO_VSID(pvo); 434 ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo), pvo->pvo_vaddr & PVO_LARGE); 435 436 /* 437 * We can find the actual pte entry without searching by grabbing 438 * the PTEG index from 3 unused bits in pvo_vaddr and by 439 * noticing the HID bit. 440 */ 441 if (pvo->pvo_pte.lpte.pte_hi & LPTE_HID) 442 ptegidx ^= moea64_pteg_mask; 443 444 return ((ptegidx << 3) | PVO_PTEGIDX_GET(pvo)); 445} 446
| |
| |