1/* 2 * vtlb.c: guest virtual tlb handling module. 3 * Copyright (c) 2004, Intel Corporation. 4 * Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com> 5 * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> 6 * 7 * Copyright (c) 2007, Intel Corporation. 8 * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com> 9 * Xiantao Zhang <xiantao.zhang@intel.com> 10 * 11 * This program is free software; you can redistribute it and/or modify it 12 * under the terms and conditions of the GNU General Public License, 13 * version 2, as published by the Free Software Foundation. 14 * 15 * This program is distributed in the hope it will be useful, but WITHOUT 16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 17 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 18 * more details. 19 * 20 * You should have received a copy of the GNU General Public License along with 21 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 22 * Place - Suite 330, Boston, MA 02111-1307 USA. 23 * 24 */ 25 26#include "vcpu.h" 27 28#include <linux/rwsem.h> 29 30#include <asm/tlb.h> 31 32/* 33 * Check to see if the address rid:va is translated by the TLB 34 */ 35 36static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va) 37{ 38 return ((trp->p) && (trp->rid == rid) 39 && ((va-trp->vadr) < PSIZE(trp->ps))); 40} 41 42/* 43 * Only for GUEST TR format. 44 */ 45static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva) 46{ 47 u64 sa1, ea1; 48 49 if (!trp->p || trp->rid != rid) 50 return 0; 51 52 sa1 = trp->vadr; 53 ea1 = sa1 + PSIZE(trp->ps) - 1; 54 eva -= 1; 55 if ((sva > ea1) || (sa1 > eva)) 56 return 0; 57 else 58 return 1; 59 60} 61 62void machine_tlb_purge(u64 va, u64 ps) 63{ 64 ia64_ptcl(va, ps << 2); 65} 66 67void local_flush_tlb_all(void) 68{ 69 int i, j; 70 unsigned long flags, count0, count1; 71 unsigned long stride0, stride1, addr; 72 73 addr = current_vcpu->arch.ptce_base; 74 count0 = current_vcpu->arch.ptce_count[0]; 75 count1 = current_vcpu->arch.ptce_count[1]; 76 stride0 = current_vcpu->arch.ptce_stride[0]; 77 stride1 = current_vcpu->arch.ptce_stride[1]; 78 79 local_irq_save(flags); 80 for (i = 0; i < count0; ++i) { 81 for (j = 0; j < count1; ++j) { 82 ia64_ptce(addr); 83 addr += stride1; 84 } 85 addr += stride0; 86 } 87 local_irq_restore(flags); 88 ia64_srlz_i(); /* srlz.i implies srlz.d */ 89} 90 91int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref) 92{ 93 union ia64_rr vrr; 94 union ia64_pta vpta; 95 struct ia64_psr vpsr; 96 97 vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); 98 vrr.val = vcpu_get_rr(vcpu, vadr); 99 vpta.val = vcpu_get_pta(vcpu); 100 101 if (vrr.ve & vpta.ve) { 102 switch (ref) { 103 case DATA_REF: 104 case NA_REF: 105 return vpsr.dt; 106 case INST_REF: 107 return vpsr.dt && vpsr.it && vpsr.ic; 108 case RSE_REF: 109 return vpsr.dt && vpsr.rt; 110 111 } 112 } 113 return 0; 114} 115 116struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag) 117{ 118 u64 index, pfn, rid, pfn_bits; 119 120 pfn_bits = vpta.size - 5 - 8; 121 pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr); 122 rid = _REGION_ID(vrr); 123 index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1)); 124 *tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16); 125 126 return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) + 127 (index << 5)); 128} 129 130struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type) 131{ 132 133 struct thash_data *trp; 134 int i; 135 u64 rid; 136 137 rid = vcpu_get_rr(vcpu, va); 138 rid = rid & RR_RID_MASK; 139 if (type == D_TLB) { 140 if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { 141 for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; 142 i < NDTRS; i++, trp++) { 143 if (__is_tr_translated(trp, rid, va)) 144 return trp; 145 } 146 } 147 } else { 148 if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { 149 for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; 150 i < NITRS; i++, trp++) { 151 if (__is_tr_translated(trp, rid, va)) 152 return trp; 153 } 154 } 155 } 156 157 return NULL; 158} 159 160static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte) 161{ 162 union ia64_rr rr; 163 struct thash_data *head; 164 unsigned long ps, gpaddr; 165 166 ps = itir_ps(itir); 167 rr.val = ia64_get_rr(ifa); 168 169 gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) | 170 (ifa & ((1UL << ps) - 1)); 171 172 head = (struct thash_data *)ia64_thash(ifa); 173 head->etag = INVALID_TI_TAG; 174 ia64_mf(); 175 head->page_flags = pte & ~PAGE_FLAGS_RV_MASK; 176 head->itir = rr.ps << 2; 177 head->etag = ia64_ttag(ifa); 178 head->gpaddr = gpaddr; 179} 180 181void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps) 182{ 183 u64 i, dirty_pages = 1; 184 u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; 185 vmm_spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); 186 void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE; 187 188 dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; 189 190 vmm_spin_lock(lock); 191 for (i = 0; i < dirty_pages; i++) { 192 /* avoid RMW */ 193 if (!test_bit(base_gfn + i, dirty_bitmap)) 194 set_bit(base_gfn + i , dirty_bitmap); 195 } 196 vmm_spin_unlock(lock); 197} 198 199void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type) 200{ 201 u64 phy_pte, psr; 202 union ia64_rr mrr; 203 204 mrr.val = ia64_get_rr(va); 205 phy_pte = translate_phy_pte(&pte, itir, va); 206 207 if (itir_ps(itir) >= mrr.ps) { 208 vhpt_insert(phy_pte, itir, va, pte); 209 } else { 210 phy_pte &= ~PAGE_FLAGS_RV_MASK; 211 psr = ia64_clear_ic(); 212 ia64_itc(type, va, phy_pte, itir_ps(itir)); 213 paravirt_dv_serialize_data(); 214 ia64_set_psr(psr); 215 } 216 217 if (!(pte&VTLB_PTE_IO)) 218 mark_pages_dirty(v, pte, itir_ps(itir)); 219} 220 221/* 222 * vhpt lookup 223 */ 224struct thash_data *vhpt_lookup(u64 va) 225{ 226 struct thash_data *head; 227 u64 tag; 228 229 head = (struct thash_data *)ia64_thash(va); 230 tag = ia64_ttag(va); 231 if (head->etag == tag) 232 return head; 233 return NULL; 234} 235 236u64 guest_vhpt_lookup(u64 iha, u64 *pte) 237{ 238 u64 ret; 239 struct thash_data *data; 240 241 data = __vtr_lookup(current_vcpu, iha, D_TLB); 242 if (data != NULL) 243 thash_vhpt_insert(current_vcpu, data->page_flags, 244 data->itir, iha, D_TLB); 245 246 asm volatile ("rsm psr.ic|psr.i;;" 247 "srlz.d;;" 248 "ld8.s r9=[%1];;" 249 "tnat.nz p6,p7=r9;;" 250 "(p6) mov %0=1;" 251 "(p6) mov r9=r0;" 252 "(p7) extr.u r9=r9,0,53;;" 253 "(p7) mov %0=r0;" 254 "(p7) st8 [%2]=r9;;" 255 "ssm psr.ic;;" 256 "srlz.d;;" 257 "ssm psr.i;;" 258 "srlz.d;;" 259 : "=r"(ret) : "r"(iha), "r"(pte):"memory"); 260 261 return ret; 262} 263 264/* 265 * purge software guest tlb 266 */ 267 268static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps) 269{ 270 struct thash_data *cur; 271 u64 start, curadr, size, psbits, tag, rr_ps, num; 272 union ia64_rr vrr; 273 struct thash_cb *hcb = &v->arch.vtlb; 274 275 vrr.val = vcpu_get_rr(v, va); 276 psbits = VMX(v, psbits[(va >> 61)]); 277 start = va & ~((1UL << ps) - 1); 278 while (psbits) { 279 curadr = start; 280 rr_ps = __ffs(psbits); 281 psbits &= ~(1UL << rr_ps); 282 num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps)); 283 size = PSIZE(rr_ps); 284 vrr.ps = rr_ps; 285 while (num) { 286 cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag); 287 if (cur->etag == tag && cur->ps == rr_ps) 288 cur->etag = INVALID_TI_TAG; 289 curadr += size; 290 num--; 291 } 292 } 293} 294 295 296/* 297 * purge VHPT and machine TLB 298 */ 299static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps) 300{ 301 struct thash_data *cur; 302 u64 start, size, tag, num; 303 union ia64_rr rr; 304 305 start = va & ~((1UL << ps) - 1); 306 rr.val = ia64_get_rr(va); 307 size = PSIZE(rr.ps); 308 num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps)); 309 while (num) { 310 cur = (struct thash_data *)ia64_thash(start); 311 tag = ia64_ttag(start); 312 if (cur->etag == tag) 313 cur->etag = INVALID_TI_TAG; 314 start += size; 315 num--; 316 } 317 machine_tlb_purge(va, ps); 318} 319 320/* 321 * Insert an entry into hash TLB or VHPT. 322 * NOTES: 323 * 1: When inserting VHPT to thash, "va" is a must covered 324 * address by the inserted machine VHPT entry. 325 * 2: The format of entry is always in TLB. 326 * 3: The caller need to make sure the new entry will not overlap 327 * with any existed entry. 328 */ 329void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va) 330{ 331 struct thash_data *head; 332 union ia64_rr vrr; 333 u64 tag; 334 struct thash_cb *hcb = &v->arch.vtlb; 335 336 vrr.val = vcpu_get_rr(v, va); 337 vrr.ps = itir_ps(itir); 338 VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps); 339 head = vsa_thash(hcb->pta, va, vrr.val, &tag); 340 head->page_flags = pte; 341 head->itir = itir; 342 head->etag = tag; 343} 344 345int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type) 346{ 347 struct thash_data *trp; 348 int i; 349 u64 end, rid; 350 351 rid = vcpu_get_rr(vcpu, va); 352 rid = rid & RR_RID_MASK; 353 end = va + PSIZE(ps); 354 if (type == D_TLB) { 355 if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { 356 for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; 357 i < NDTRS; i++, trp++) { 358 if (__is_tr_overlap(trp, rid, va, end)) 359 return i; 360 } 361 } 362 } else { 363 if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { 364 for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; 365 i < NITRS; i++, trp++) { 366 if (__is_tr_overlap(trp, rid, va, end)) 367 return i; 368 } 369 } 370 } 371 return -1; 372} 373 374/* 375 * Purge entries in VTLB and VHPT 376 */ 377void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps) 378{ 379 if (vcpu_quick_region_check(v->arch.tc_regions, va)) 380 vtlb_purge(v, va, ps); 381 vhpt_purge(v, va, ps); 382} 383 384void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps) 385{ 386 u64 old_va = va; 387 va = REGION_OFFSET(va); 388 if (vcpu_quick_region_check(v->arch.tc_regions, old_va)) 389 vtlb_purge(v, va, ps); 390 vhpt_purge(v, va, ps); 391} 392 393u64 translate_phy_pte(u64 *pte, u64 itir, u64 va) 394{ 395 u64 ps, ps_mask, paddr, maddr, io_mask; 396 union pte_flags phy_pte; 397 398 ps = itir_ps(itir); 399 ps_mask = ~((1UL << ps) - 1); 400 phy_pte.val = *pte; 401 paddr = *pte; 402 paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask); 403 maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT); 404 io_mask = maddr & GPFN_IO_MASK; 405 if (io_mask && (io_mask != GPFN_PHYS_MMIO)) { 406 *pte |= VTLB_PTE_IO; 407 return -1; 408 } 409 maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) | 410 (paddr & ~PAGE_MASK); 411 phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT; 412 return phy_pte.val; 413} 414 415/* 416 * Purge overlap TCs and then insert the new entry to emulate itc ops. 417 * Notes: Only TC entry can purge and insert. 418 */ 419void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, 420 u64 ifa, int type) 421{ 422 u64 ps; 423 u64 phy_pte, io_mask, index; 424 union ia64_rr vrr, mrr; 425 426 ps = itir_ps(itir); 427 vrr.val = vcpu_get_rr(v, ifa); 428 mrr.val = ia64_get_rr(ifa); 429 430 index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT; 431 io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK; 432 phy_pte = translate_phy_pte(&pte, itir, ifa); 433 434 /* Ensure WB attribute if pte is related to a normal mem page, 435 * which is required by vga acceleration since qemu maps shared 436 * vram buffer with WB. 437 */ 438 if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT) && 439 io_mask != GPFN_PHYS_MMIO) { 440 pte &= ~_PAGE_MA_MASK; 441 phy_pte &= ~_PAGE_MA_MASK; 442 } 443 444 vtlb_purge(v, ifa, ps); 445 vhpt_purge(v, ifa, ps); 446 447 if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) { 448 vtlb_insert(v, pte, itir, ifa); 449 vcpu_quick_region_set(VMX(v, tc_regions), ifa); 450 } 451 if (pte & VTLB_PTE_IO) 452 return; 453 454 if (ps >= mrr.ps) 455 vhpt_insert(phy_pte, itir, ifa, pte); 456 else { 457 u64 psr; 458 phy_pte &= ~PAGE_FLAGS_RV_MASK; 459 psr = ia64_clear_ic(); 460 ia64_itc(type, ifa, phy_pte, ps); 461 paravirt_dv_serialize_data(); 462 ia64_set_psr(psr); 463 } 464 if (!(pte&VTLB_PTE_IO)) 465 mark_pages_dirty(v, pte, ps); 466 467} 468 469/* 470 * Purge all TCs or VHPT entries including those in Hash table. 471 * 472 */ 473 474void thash_purge_all(struct kvm_vcpu *v) 475{ 476 int i; 477 struct thash_data *head; 478 struct thash_cb *vtlb, *vhpt; 479 vtlb = &v->arch.vtlb; 480 vhpt = &v->arch.vhpt; 481 482 for (i = 0; i < 8; i++) 483 VMX(v, psbits[i]) = 0; 484 485 head = vtlb->hash; 486 for (i = 0; i < vtlb->num; i++) { 487 head->page_flags = 0; 488 head->etag = INVALID_TI_TAG; 489 head->itir = 0; 490 head->next = 0; 491 head++; 492 }; 493 494 head = vhpt->hash; 495 for (i = 0; i < vhpt->num; i++) { 496 head->page_flags = 0; 497 head->etag = INVALID_TI_TAG; 498 head->itir = 0; 499 head->next = 0; 500 head++; 501 }; 502 503 local_flush_tlb_all(); 504} 505 506/* 507 * Lookup the hash table and its collision chain to find an entry 508 * covering this address rid:va or the entry. 509 * 510 * INPUT: 511 * in: TLB format for both VHPT & TLB. 512 */ 513struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data) 514{ 515 struct thash_data *cch; 516 u64 psbits, ps, tag; 517 union ia64_rr vrr; 518 519 struct thash_cb *hcb = &v->arch.vtlb; 520 521 cch = __vtr_lookup(v, va, is_data); 522 if (cch) 523 return cch; 524 525 if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0) 526 return NULL; 527 528 psbits = VMX(v, psbits[(va >> 61)]); 529 vrr.val = vcpu_get_rr(v, va); 530 while (psbits) { 531 ps = __ffs(psbits); 532 psbits &= ~(1UL << ps); 533 vrr.ps = ps; 534 cch = vsa_thash(hcb->pta, va, vrr.val, &tag); 535 if (cch->etag == tag && cch->ps == ps) 536 return cch; 537 } 538 539 return NULL; 540} 541 542/* 543 * Initialize internal control data before service. 544 */ 545void thash_init(struct thash_cb *hcb, u64 sz) 546{ 547 int i; 548 struct thash_data *head; 549 550 hcb->pta.val = (unsigned long)hcb->hash; 551 hcb->pta.vf = 1; 552 hcb->pta.ve = 1; 553 hcb->pta.size = sz; 554 head = hcb->hash; 555 for (i = 0; i < hcb->num; i++) { 556 head->page_flags = 0; 557 head->itir = 0; 558 head->etag = INVALID_TI_TAG; 559 head->next = 0; 560 head++; 561 } 562} 563 564u64 kvm_get_mpt_entry(u64 gpfn) 565{ 566 u64 *base = (u64 *) KVM_P2M_BASE; 567 568 if (gpfn >= (KVM_P2M_SIZE >> 3)) 569 panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn); 570 571 return *(base + gpfn); 572} 573 574u64 kvm_lookup_mpa(u64 gpfn) 575{ 576 u64 maddr; 577 maddr = kvm_get_mpt_entry(gpfn); 578 return maddr&_PAGE_PPN_MASK; 579} 580 581u64 kvm_gpa_to_mpa(u64 gpa) 582{ 583 u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT); 584 return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK); 585} 586 587/* 588 * Fetch guest bundle code. 589 * INPUT: 590 * gip: guest ip 591 * pbundle: used to return fetched bundle. 592 */ 593int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle) 594{ 595 u64 gpip = 0; /* guest physical IP*/ 596 u64 *vpa; 597 struct thash_data *tlb; 598 u64 maddr; 599 600 if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) { 601 /* I-side physical mode */ 602 gpip = gip; 603 } else { 604 tlb = vtlb_lookup(vcpu, gip, I_TLB); 605 if (tlb) 606 gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) | 607 (gip & (PSIZE(tlb->ps) - 1)); 608 } 609 if (gpip) { 610 maddr = kvm_gpa_to_mpa(gpip); 611 } else { 612 tlb = vhpt_lookup(gip); 613 if (tlb == NULL) { 614 ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2); 615 return IA64_FAULT; 616 } 617 maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) 618 | (gip & (PSIZE(tlb->ps) - 1)); 619 } 620 vpa = (u64 *)__kvm_va(maddr); 621 622 pbundle->i64[0] = *vpa++; 623 pbundle->i64[1] = *vpa; 624 625 return IA64_NO_FAULT; 626} 627 628void kvm_init_vhpt(struct kvm_vcpu *v) 629{ 630 v->arch.vhpt.num = VHPT_NUM_ENTRIES; 631 thash_init(&v->arch.vhpt, VHPT_SHIFT); 632 ia64_set_pta(v->arch.vhpt.pta.val); 633 /*Enable VHPT here?*/ 634} 635 636void kvm_init_vtlb(struct kvm_vcpu *v) 637{ 638 v->arch.vtlb.num = VTLB_NUM_ENTRIES; 639 thash_init(&v->arch.vtlb, VTLB_SHIFT); 640} 641