1/* 2 * This file contains the routines for TLB flushing. 3 * On machines where the MMU does not use a hash table to store virtual to 4 * physical translations (ie, SW loaded TLBs or Book3E compilant processors, 5 * this does -not- include 603 however which shares the implementation with 6 * hash based processors) 7 * 8 * -- BenH 9 * 10 * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org> 11 * IBM Corp. 12 * 13 * Derived from arch/ppc/mm/init.c: 14 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 15 * 16 * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) 17 * and Cort Dougan (PReP) (cort@cs.nmt.edu) 18 * Copyright (C) 1996 Paul Mackerras 19 * 20 * Derived from "arch/i386/mm/init.c" 21 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds 22 * 23 * This program is free software; you can redistribute it and/or 24 * modify it under the terms of the GNU General Public License 25 * as published by the Free Software Foundation; either version 26 * 2 of the License, or (at your option) any later version. 27 * 28 */ 29 30#include <linux/kernel.h> 31#include <linux/mm.h> 32#include <linux/init.h> 33#include <linux/highmem.h> 34#include <linux/pagemap.h> 35#include <linux/preempt.h> 36#include <linux/spinlock.h> 37#include <linux/memblock.h> 38 39#include <asm/tlbflush.h> 40#include <asm/tlb.h> 41#include <asm/code-patching.h> 42 43#include "mmu_decl.h" 44 45#ifdef CONFIG_PPC_BOOK3E 46struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { 47 [MMU_PAGE_4K] = { 48 .shift = 12, 49 .ind = 20, 50 .enc = BOOK3E_PAGESZ_4K, 51 }, 52 [MMU_PAGE_16K] = { 53 .shift = 14, 54 .enc = BOOK3E_PAGESZ_16K, 55 }, 56 [MMU_PAGE_64K] = { 57 .shift = 16, 58 .ind = 28, 59 .enc = BOOK3E_PAGESZ_64K, 60 }, 61 [MMU_PAGE_1M] = { 62 .shift = 20, 63 .enc = BOOK3E_PAGESZ_1M, 64 }, 65 [MMU_PAGE_16M] = { 66 .shift = 24, 67 .ind = 36, 68 .enc = BOOK3E_PAGESZ_16M, 69 }, 70 [MMU_PAGE_256M] = { 71 .shift = 28, 72 .enc = BOOK3E_PAGESZ_256M, 73 }, 74 [MMU_PAGE_1G] = { 75 .shift = 30, 76 .enc = BOOK3E_PAGESZ_1GB, 77 }, 78}; 79static inline int mmu_get_tsize(int psize) 80{ 81 return mmu_psize_defs[psize].enc; 82} 83#else 84static inline int mmu_get_tsize(int psize) 85{ 86 /* This isn't used on !Book3E for now */ 87 return 0; 88} 89#endif 90 91/* The variables below are currently only used on 64-bit Book3E 92 * though this will probably be made common with other nohash 93 * implementations at some point 94 */ 95#ifdef CONFIG_PPC64 96 97int mmu_linear_psize; /* Page size used for the linear mapping */ 98int mmu_pte_psize; /* Page size used for PTE pages */ 99int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ 100int book3e_htw_enabled; /* Is HW tablewalk enabled ? */ 101unsigned long linear_map_top; /* Top of linear mapping */ 102 103#endif /* CONFIG_PPC64 */ 104 105/* 106 * Base TLB flushing operations: 107 * 108 * - flush_tlb_mm(mm) flushes the specified mm context TLB's 109 * - flush_tlb_page(vma, vmaddr) flushes one page 110 * - flush_tlb_range(vma, start, end) flushes a range of pages 111 * - flush_tlb_kernel_range(start, end) flushes kernel pages 112 * 113 * - local_* variants of page and mm only apply to the current 114 * processor 115 */ 116 117/* 118 * These are the base non-SMP variants of page and mm flushing 119 */ 120void local_flush_tlb_mm(struct mm_struct *mm) 121{ 122 unsigned int pid; 123 124 preempt_disable(); 125 pid = mm->context.id; 126 if (pid != MMU_NO_CONTEXT) 127 _tlbil_pid(pid); 128 preempt_enable(); 129} 130EXPORT_SYMBOL(local_flush_tlb_mm); 131 132void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, 133 int tsize, int ind) 134{ 135 unsigned int pid; 136 137 preempt_disable(); 138 pid = mm ? mm->context.id : 0; 139 if (pid != MMU_NO_CONTEXT) 140 _tlbil_va(vmaddr, pid, tsize, ind); 141 preempt_enable(); 142} 143 144void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 145{ 146 __local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, 147 mmu_get_tsize(mmu_virtual_psize), 0); 148} 149EXPORT_SYMBOL(local_flush_tlb_page); 150 151/* 152 * And here are the SMP non-local implementations 153 */ 154#ifdef CONFIG_SMP 155 156static DEFINE_RAW_SPINLOCK(tlbivax_lock); 157 158static int mm_is_core_local(struct mm_struct *mm) 159{ 160 return cpumask_subset(mm_cpumask(mm), 161 topology_thread_cpumask(smp_processor_id())); 162} 163 164struct tlb_flush_param { 165 unsigned long addr; 166 unsigned int pid; 167 unsigned int tsize; 168 unsigned int ind; 169}; 170 171static void do_flush_tlb_mm_ipi(void *param) 172{ 173 struct tlb_flush_param *p = param; 174 175 _tlbil_pid(p ? p->pid : 0); 176} 177 178static void do_flush_tlb_page_ipi(void *param) 179{ 180 struct tlb_flush_param *p = param; 181 182 _tlbil_va(p->addr, p->pid, p->tsize, p->ind); 183} 184 185 186/* Note on invalidations and PID: 187 * 188 * We snapshot the PID with preempt disabled. At this point, it can still 189 * change either because: 190 * - our context is being stolen (PID -> NO_CONTEXT) on another CPU 191 * - we are invaliating some target that isn't currently running here 192 * and is concurrently acquiring a new PID on another CPU 193 * - some other CPU is re-acquiring a lost PID for this mm 194 * etc... 195 * 196 * However, this shouldn't be a problem as we only guarantee 197 * invalidation of TLB entries present prior to this call, so we 198 * don't care about the PID changing, and invalidating a stale PID 199 * is generally harmless. 200 */ 201 202void flush_tlb_mm(struct mm_struct *mm) 203{ 204 unsigned int pid; 205 206 preempt_disable(); 207 pid = mm->context.id; 208 if (unlikely(pid == MMU_NO_CONTEXT)) 209 goto no_context; 210 if (!mm_is_core_local(mm)) { 211 struct tlb_flush_param p = { .pid = pid }; 212 /* Ignores smp_processor_id() even if set. */ 213 smp_call_function_many(mm_cpumask(mm), 214 do_flush_tlb_mm_ipi, &p, 1); 215 } 216 _tlbil_pid(pid); 217 no_context: 218 preempt_enable(); 219} 220EXPORT_SYMBOL(flush_tlb_mm); 221 222void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, 223 int tsize, int ind) 224{ 225 struct cpumask *cpu_mask; 226 unsigned int pid; 227 228 preempt_disable(); 229 pid = mm ? mm->context.id : 0; 230 if (unlikely(pid == MMU_NO_CONTEXT)) 231 goto bail; 232 cpu_mask = mm_cpumask(mm); 233 if (!mm_is_core_local(mm)) { 234 /* If broadcast tlbivax is supported, use it */ 235 if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { 236 int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); 237 if (lock) 238 raw_spin_lock(&tlbivax_lock); 239 _tlbivax_bcast(vmaddr, pid, tsize, ind); 240 if (lock) 241 raw_spin_unlock(&tlbivax_lock); 242 goto bail; 243 } else { 244 struct tlb_flush_param p = { 245 .pid = pid, 246 .addr = vmaddr, 247 .tsize = tsize, 248 .ind = ind, 249 }; 250 /* Ignores smp_processor_id() even if set in cpu_mask */ 251 smp_call_function_many(cpu_mask, 252 do_flush_tlb_page_ipi, &p, 1); 253 } 254 } 255 _tlbil_va(vmaddr, pid, tsize, ind); 256 bail: 257 preempt_enable(); 258} 259 260void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) 261{ 262 __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, 263 mmu_get_tsize(mmu_virtual_psize), 0); 264} 265EXPORT_SYMBOL(flush_tlb_page); 266 267#endif /* CONFIG_SMP */ 268 269/* 270 * Flush kernel TLB entries in the given range 271 */ 272void flush_tlb_kernel_range(unsigned long start, unsigned long end) 273{ 274#ifdef CONFIG_SMP 275 preempt_disable(); 276 smp_call_function(do_flush_tlb_mm_ipi, NULL, 1); 277 _tlbil_pid(0); 278 preempt_enable(); 279#else 280 _tlbil_pid(0); 281#endif 282} 283EXPORT_SYMBOL(flush_tlb_kernel_range); 284 285/* 286 * Currently, for range flushing, we just do a full mm flush. This should 287 * be optimized based on a threshold on the size of the range, since 288 * some implementation can stack multiple tlbivax before a tlbsync but 289 * for now, we keep it that way 290 */ 291void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 292 unsigned long end) 293 294{ 295 flush_tlb_mm(vma->vm_mm); 296} 297EXPORT_SYMBOL(flush_tlb_range); 298 299void tlb_flush(struct mmu_gather *tlb) 300{ 301 flush_tlb_mm(tlb->mm); 302 303 /* Push out batch of freed page tables */ 304 pte_free_finish(); 305} 306 307/* 308 * Below are functions specific to the 64-bit variant of Book3E though that 309 * may change in the future 310 */ 311 312#ifdef CONFIG_PPC64 313 314/* 315 * Handling of virtual linear page tables or indirect TLB entries 316 * flushing when PTE pages are freed 317 */ 318void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) 319{ 320 int tsize = mmu_psize_defs[mmu_pte_psize].enc; 321 322 if (book3e_htw_enabled) { 323 unsigned long start = address & PMD_MASK; 324 unsigned long end = address + PMD_SIZE; 325 unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift; 326 327 /* This isn't the most optimal, ideally we would factor out the 328 * while preempt & CPU mask mucking around, or even the IPI but 329 * it will do for now 330 */ 331 while (start < end) { 332 __flush_tlb_page(tlb->mm, start, tsize, 1); 333 start += size; 334 } 335 } else { 336 unsigned long rmask = 0xf000000000000000ul; 337 unsigned long rid = (address & rmask) | 0x1000000000000000ul; 338 unsigned long vpte = address & ~rmask; 339 340#ifdef CONFIG_PPC_64K_PAGES 341 vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful; 342#else 343 vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful; 344#endif 345 vpte |= rid; 346 __flush_tlb_page(tlb->mm, vpte, tsize, 0); 347 } 348} 349 350static void setup_page_sizes(void) 351{ 352 unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); 353 unsigned int tlb0ps = mfspr(SPRN_TLB0PS); 354 unsigned int eptcfg = mfspr(SPRN_EPTCFG); 355 int i, psize; 356 357 /* Look for supported direct sizes */ 358 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 359 struct mmu_psize_def *def = &mmu_psize_defs[psize]; 360 361 if (tlb0ps & (1U << (def->shift - 10))) 362 def->flags |= MMU_PAGE_SIZE_DIRECT; 363 } 364 365 /* Indirect page sizes supported ? */ 366 if ((tlb0cfg & TLBnCFG_IND) == 0) 367 goto no_indirect; 368 369 /* Now, we only deal with one IND page size for each 370 * direct size. Hopefully all implementations today are 371 * unambiguous, but we might want to be careful in the 372 * future. 373 */ 374 for (i = 0; i < 3; i++) { 375 unsigned int ps, sps; 376 377 sps = eptcfg & 0x1f; 378 eptcfg >>= 5; 379 ps = eptcfg & 0x1f; 380 eptcfg >>= 5; 381 if (!ps || !sps) 382 continue; 383 for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { 384 struct mmu_psize_def *def = &mmu_psize_defs[psize]; 385 386 if (ps == (def->shift - 10)) 387 def->flags |= MMU_PAGE_SIZE_INDIRECT; 388 if (sps == (def->shift - 10)) 389 def->ind = ps + 10; 390 } 391 } 392 no_indirect: 393 394 /* Cleanup array and print summary */ 395 pr_info("MMU: Supported page sizes\n"); 396 for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { 397 struct mmu_psize_def *def = &mmu_psize_defs[psize]; 398 const char *__page_type_names[] = { 399 "unsupported", 400 "direct", 401 "indirect", 402 "direct & indirect" 403 }; 404 if (def->flags == 0) { 405 def->shift = 0; 406 continue; 407 } 408 pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10), 409 __page_type_names[def->flags & 0x3]); 410 } 411} 412 413static void setup_mmu_htw(void) 414{ 415 extern unsigned int interrupt_base_book3e; 416 extern unsigned int exc_data_tlb_miss_htw_book3e; 417 extern unsigned int exc_instruction_tlb_miss_htw_book3e; 418 419 unsigned int *ibase = &interrupt_base_book3e; 420 421 /* Check if HW tablewalk is present, and if yes, enable it by: 422 * 423 * - patching the TLB miss handlers to branch to the 424 * one dedicates to it 425 * 426 * - setting the global book3e_htw_enabled 427 */ 428 unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG); 429 430 if ((tlb0cfg & TLBnCFG_IND) && 431 (tlb0cfg & TLBnCFG_PT)) { 432 /* Our exceptions vectors start with a NOP and -then- a branch 433 * to deal with single stepping from userspace which stops on 434 * the second instruction. Thus we need to patch the second 435 * instruction of the exception, not the first one 436 */ 437 patch_branch(ibase + (0x1c0 / 4) + 1, 438 (unsigned long)&exc_data_tlb_miss_htw_book3e, 0); 439 patch_branch(ibase + (0x1e0 / 4) + 1, 440 (unsigned long)&exc_instruction_tlb_miss_htw_book3e, 0); 441 book3e_htw_enabled = 1; 442 } 443 pr_info("MMU: Book3E Page Tables %s\n", 444 book3e_htw_enabled ? "Enabled" : "Disabled"); 445} 446 447/* 448 * Early initialization of the MMU TLB code 449 */ 450static void __early_init_mmu(int boot_cpu) 451{ 452 unsigned int mas4; 453 454 mmu_linear_psize = MMU_PAGE_1G; 455 456 mmu_vmemmap_psize = MMU_PAGE_16M; 457 458 if (boot_cpu) { 459 /* Look for supported page sizes */ 460 setup_page_sizes(); 461 462 /* Look for HW tablewalk support */ 463 setup_mmu_htw(); 464 } 465 466 /* Set MAS4 based on page table setting */ 467 468 mas4 = 0x4 << MAS4_WIMGED_SHIFT; 469 if (book3e_htw_enabled) { 470 mas4 |= mas4 | MAS4_INDD; 471#ifdef CONFIG_PPC_64K_PAGES 472 mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT; 473 mmu_pte_psize = MMU_PAGE_256M; 474#else 475 mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT; 476 mmu_pte_psize = MMU_PAGE_1M; 477#endif 478 } else { 479#ifdef CONFIG_PPC_64K_PAGES 480 mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT; 481#else 482 mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; 483#endif 484 mmu_pte_psize = mmu_virtual_psize; 485 } 486 mtspr(SPRN_MAS4, mas4); 487 488 /* Set the global containing the top of the linear mapping 489 * for use by the TLB miss code 490 */ 491 linear_map_top = memblock_end_of_DRAM(); 492 493 /* A sync won't hurt us after mucking around with 494 * the MMU configuration 495 */ 496 mb(); 497} 498 499void __init early_init_mmu(void) 500{ 501 __early_init_mmu(1); 502} 503 504void __cpuinit early_init_mmu_secondary(void) 505{ 506 __early_init_mmu(0); 507} 508 509#endif /* CONFIG_PPC64 */ 510