1/* 2 * Copyright (C) 2010 Andreas Tobler 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright --- 10 unchanged lines hidden (view full) --- 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 21 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 22 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 23 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include <sys/cdefs.h> |
27__FBSDID("$FreeBSD: head/sys/powerpc/pseries/mmu_phyp.c 279252 2015-02-24 21:37:20Z nwhitehorn $"); |
28 29#include <sys/param.h> 30#include <sys/kernel.h> 31#include <sys/ktr.h> 32#include <sys/lock.h> |
33#include <sys/rwlock.h> |
34#include <sys/mutex.h> 35#include <sys/proc.h> 36#include <sys/sysctl.h> 37#include <sys/systm.h> 38#include <sys/vmmeter.h> 39 40#include <dev/ofw/openfirm.h> 41#include <machine/ofw_machdep.h> --- 12 unchanged lines hidden (view full) --- 54 55#include "mmu_if.h" 56#include "moea64_if.h" 57 58#include "phyp-hvcall.h" 59 60extern int n_slbs; 61 |
62static struct rwlock mphyp_eviction_lock; 63 |
64/* 65 * Kernel MMU interface 66 */ 67 68static void mphyp_bootstrap(mmu_t mmup, vm_offset_t kernelstart, 69 vm_offset_t kernelend); 70static void mphyp_cpu_bootstrap(mmu_t mmup, int ap); |
71static int64_t mphyp_pte_synch(mmu_t, struct pvo_entry *pvo); 72static int64_t mphyp_pte_clear(mmu_t, struct pvo_entry *pvo, uint64_t ptebit); 73static int64_t mphyp_pte_unset(mmu_t, struct pvo_entry *pvo); 74static int mphyp_pte_insert(mmu_t, struct pvo_entry *pvo); |
75 |
76static mmu_method_t mphyp_methods[] = { 77 MMUMETHOD(mmu_bootstrap, mphyp_bootstrap), 78 MMUMETHOD(mmu_cpu_bootstrap, mphyp_cpu_bootstrap), 79 80 MMUMETHOD(moea64_pte_synch, mphyp_pte_synch), 81 MMUMETHOD(moea64_pte_clear, mphyp_pte_clear), 82 MMUMETHOD(moea64_pte_unset, mphyp_pte_unset), |
83 MMUMETHOD(moea64_pte_insert, mphyp_pte_insert), |
84 |
85 /* XXX: pmap_copy_page, pmap_init_page with H_PAGE_INIT */ 86 |
87 { 0, 0 } 88}; 89 90MMU_DEF_INHERIT(pseries_mmu, "mmu_phyp", mphyp_methods, 0, oea64_mmu); 91 |
92static int brokenkvm = 0; 93 |
94static void |
95print_kvm_bug_warning(void *data) 96{ 97 98 if (brokenkvm) 99 printf("WARNING: Running on a broken hypervisor that does " 100 "not support mandatory H_CLEAR_MOD and H_CLEAR_REF " 101 "hypercalls. Performance will be suboptimal.\n"); 102} 103 104SYSINIT(kvmbugwarn1, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1, 105 print_kvm_bug_warning, NULL); 106SYSINIT(kvmbugwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1, print_kvm_bug_warning, 107 NULL); 108 109static void |
110mphyp_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) 111{ 112 uint64_t final_pteg_count = 0; 113 char buf[8]; 114 uint32_t prop[2]; 115 uint32_t nptlp, shift = 0, slb_encoding = 0; 116 uint32_t lp_size, lp_encoding; 117 phandle_t dev, node, root; 118 int idx, len, res; 119 |
120 rw_init(&mphyp_eviction_lock, "pte eviction"); 121 |
122 moea64_early_bootstrap(mmup, kernelstart, kernelend); 123 124 root = OF_peer(0); 125 126 dev = OF_child(root); 127 while (dev != 0) { 128 res = OF_getprop(dev, "name", buf, sizeof(buf)); 129 if (res > 0 && strcmp(buf, "cpus") == 0) --- 63 unchanged lines hidden (view full) --- 193 "page backing if running under PowerKVM."); 194 195 moea64_large_page_shift = shift; 196 moea64_large_page_size = 1ULL << lp_size; 197 } 198 199 moea64_mid_bootstrap(mmup, kernelstart, kernelend); 200 moea64_late_bootstrap(mmup, kernelstart, kernelend); |
201 202 /* Test for broken versions of KVM that don't conform to the spec */ 203 if (phyp_hcall(H_CLEAR_MOD, 0, 0) == H_FUNCTION) 204 brokenkvm = 1; |
205} 206 207static void 208mphyp_cpu_bootstrap(mmu_t mmup, int ap) 209{ 210 struct slb *slb = PCPU_GET(slb); 211 register_t seg0; 212 int i; --- 8 unchanged lines hidden (view full) --- 221 if (!(slb[i].slbe & SLBE_VALID)) 222 continue; 223 224 __asm __volatile ("slbmte %0, %1" :: 225 "r"(slb[i].slbv), "r"(slb[i].slbe)); 226 } 227} 228 |
229static int64_t 230mphyp_pte_synch(mmu_t mmu, struct pvo_entry *pvo) |
231{ 232 struct lpte pte; 233 uint64_t junk; 234 235 __asm __volatile("ptesync"); |
236 phyp_pft_hcall(H_READ, 0, pvo->pvo_pte.slot, 0, 0, &pte.pte_hi, 237 &pte.pte_lo, &junk); 238 if ((pte.pte_hi & LPTE_AVPN_MASK) != 239 ((pvo->pvo_vpn >> (ADDR_API_SHFT64 - ADDR_PIDX_SHFT)) & 240 LPTE_AVPN_MASK)) 241 return (-1); 242 if (!(pte.pte_hi & LPTE_VALID)) 243 return (-1); |
244 |
245 return (pte.pte_lo & (LPTE_CHG | LPTE_REF)); |
246} 247 |
248static int64_t 249mphyp_pte_clear(mmu_t mmu, struct pvo_entry *pvo, uint64_t ptebit) |
250{ |
251 int64_t refchg; 252 uint64_t ptelo, junk; 253 int err; |
254 |
255 /* 256 * This involves two steps (synch and clear) so we need the entry 257 * not to change in the middle. We are protected against deliberate 258 * unset by virtue of holding the pmap lock. Protection against 259 * incidental unset (page table eviction) comes from holding the 260 * shared eviction lock. 261 */ 262 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); 263 rw_rlock(&mphyp_eviction_lock); 264 265 refchg = mphyp_pte_synch(mmu, pvo); 266 if (refchg < 0) { 267 rw_runlock(&mphyp_eviction_lock); 268 return (refchg); 269 } 270 271 if (brokenkvm) { 272 /* 273 * No way to clear either bit, which is total madness. 274 * Pessimistically claim that, once modified, it stays so 275 * forever and that it is never referenced. 276 */ 277 rw_runlock(&mphyp_eviction_lock); 278 return (refchg & ~LPTE_REF); 279 } 280 281 if (ptebit & LPTE_CHG) { 282 err = phyp_pft_hcall(H_CLEAR_MOD, 0, pvo->pvo_pte.slot, 0, 0, 283 &ptelo, &junk, &junk); 284 KASSERT(err == H_SUCCESS, 285 ("Error clearing page change bit: %d", err)); 286 refchg |= (ptelo & LPTE_CHG); 287 } 288 if (ptebit & LPTE_REF) { 289 err = phyp_pft_hcall(H_CLEAR_REF, 0, pvo->pvo_pte.slot, 0, 0, 290 &ptelo, &junk, &junk); 291 KASSERT(err == H_SUCCESS, 292 ("Error clearing page reference bit: %d", err)); 293 refchg |= (ptelo & LPTE_REF); 294 } 295 296 rw_runlock(&mphyp_eviction_lock); 297 298 return (refchg); |
299} 300 |
301static int64_t 302mphyp_pte_unset(mmu_t mmu, struct pvo_entry *pvo) |
303{ 304 struct lpte pte; 305 uint64_t junk; 306 int err; 307 |
308 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); |
309 |
310 moea64_pte_from_pvo(pvo, &pte); |
311 |
312 err = phyp_pft_hcall(H_REMOVE, H_AVPN, pvo->pvo_pte.slot, 313 pte.pte_hi & LPTE_AVPN_MASK, 0, &pte.pte_hi, &pte.pte_lo, 314 &junk); 315 KASSERT(err == H_SUCCESS || err == H_NOT_FOUND, 316 ("Error removing page: %d", err)); |
317 |
318 if (err == H_NOT_FOUND) { 319 moea64_pte_overflow--; 320 return (-1); 321 } |
322 |
323 return (pte.pte_lo & (LPTE_REF | LPTE_CHG)); |
324} 325 |
326static uintptr_t 327mphyp_pte_spillable_ident(uintptr_t ptegbase, struct lpte *to_evict) |
328{ 329 uint64_t slot, junk, k; 330 struct lpte pt; 331 int i, j; 332 333 /* Start at a random slot */ 334 i = mftb() % 8; 335 k = -1; 336 for (j = 0; j < 8; j++) { |
337 slot = ptegbase + (i + j) % 8; 338 phyp_pft_hcall(H_READ, 0, slot, 0, 0, &pt.pte_hi, 339 &pt.pte_lo, &junk); |
340 341 if (pt.pte_hi & LPTE_WIRED) 342 continue; 343 344 /* This is a candidate, so remember it */ 345 k = slot; 346 347 /* Try to get a page that has not been used lately */ |
348 if (!(pt.pte_hi & LPTE_VALID) || !(pt.pte_lo & LPTE_REF)) { |
349 memcpy(to_evict, &pt, sizeof(struct lpte)); 350 return (k); 351 } 352 } 353 354 if (k == -1) 355 return (k); 356 357 phyp_pft_hcall(H_READ, 0, k, 0, 0, &to_evict->pte_hi, 358 &to_evict->pte_lo, &junk); 359 return (k); 360} 361 362static int |
363mphyp_pte_insert(mmu_t mmu, struct pvo_entry *pvo) |
364{ 365 int64_t result; |
366 struct lpte evicted, pte; 367 uint64_t index, junk, lastptelo; |
368 |
369 PMAP_LOCK_ASSERT(pvo->pvo_pmap, MA_OWNED); |
370 371 /* Initialize PTE */ |
372 moea64_pte_from_pvo(pvo, &pte); |
373 evicted.pte_hi = 0; 374 |
375 /* Make sure further insertion is locked out during evictions */ 376 rw_rlock(&mphyp_eviction_lock); 377 |
378 /* 379 * First try primary hash. 380 */ |
381 pvo->pvo_pte.slot &= ~7UL; /* Base slot address */ 382 result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, pte.pte_hi, 383 pte.pte_lo, &index, &evicted.pte_lo, &junk); 384 if (result == H_SUCCESS) { 385 rw_runlock(&mphyp_eviction_lock); 386 pvo->pvo_pte.slot = index; 387 return (0); 388 } |
389 KASSERT(result == H_PTEG_FULL, ("Page insertion error: %ld " |
390 "(ptegidx: %#zx/%#x, PTE %#lx/%#lx", result, pvo->pvo_pte.slot, 391 moea64_pteg_count, pte.pte_hi, pte.pte_lo)); |
392 393 /* 394 * Next try secondary hash. 395 */ |
396 pvo->pvo_vaddr ^= PVO_HID; 397 pte.pte_hi ^= LPTE_HID; 398 pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); 399 400 result = phyp_pft_hcall(H_ENTER, 0, pvo->pvo_pte.slot, 401 pte.pte_hi, pte.pte_lo, &index, &evicted.pte_lo, &junk); 402 if (result == H_SUCCESS) { 403 rw_runlock(&mphyp_eviction_lock); 404 pvo->pvo_pte.slot = index; 405 return (0); 406 } |
407 KASSERT(result == H_PTEG_FULL, ("Secondary page insertion error: %ld", 408 result)); 409 410 /* 411 * Out of luck. Find a PTE to sacrifice. 412 */ |
413 414 /* Lock out all insertions for a bit */ 415 if (!rw_try_upgrade(&mphyp_eviction_lock)) { 416 rw_runlock(&mphyp_eviction_lock); 417 rw_wlock(&mphyp_eviction_lock); 418 } 419 420 index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted); |
421 if (index == -1L) { |
422 /* Try other hash table? */ 423 pvo->pvo_vaddr ^= PVO_HID; 424 pte.pte_hi ^= LPTE_HID; 425 pvo->pvo_pte.slot ^= (moea64_pteg_mask << 3); 426 index = mphyp_pte_spillable_ident(pvo->pvo_pte.slot, &evicted); |
427 } 428 429 if (index == -1L) { 430 /* No freeable slots in either PTEG? We're hosed. */ |
431 rw_wunlock(&mphyp_eviction_lock); |
432 panic("mphyp_pte_insert: overflow"); 433 return (-1); 434 } 435 |
436 /* Victim acquired: update page before waving goodbye */ 437 if (evicted.pte_hi & LPTE_VALID) { 438 result = phyp_pft_hcall(H_REMOVE, H_AVPN, index, 439 evicted.pte_hi & LPTE_AVPN_MASK, 0, &junk, &lastptelo, 440 &junk); 441 moea64_pte_overflow++; 442 KASSERT(result == H_SUCCESS, 443 ("Error evicting page: %d", (int)result)); |
444 } 445 |
446 /* 447 * Set the new PTE. 448 */ |
449 result = phyp_pft_hcall(H_ENTER, H_EXACT, index, pte.pte_hi, 450 pte.pte_lo, &index, &evicted.pte_lo, &junk); 451 rw_wunlock(&mphyp_eviction_lock); /* All clear */ 452 453 pvo->pvo_pte.slot = index; |
454 if (result == H_SUCCESS) |
455 return (0); |
456 457 panic("Page replacement error: %ld", result); |
458 return (result); |
459} 460 |