1/* 2 * include/asm-s390/pgtable.h 3 * 4 * S390 64bit version 5 * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation 6 * Author(s): Hartmut Penner (hp@de.ibm.com) 7 * Ulrich Weigand (weigand@de.ibm.com) 8 * Martin Schwidefsky (schwidefsky@de.ibm.com) 9 * 10 * Derived from "include/asm-i386/pgtable.h" 11 */ 12 13#ifndef _ASM_S390_PGTABLE_H 14#define _ASM_S390_PGTABLE_H 15 16/* 17 * The Linux memory management assumes a three-level page table setup. On 18 * the S390, we use that, but "fold" the mid level into the top-level page 19 * table, so that we physically have the same two-level page table as the 20 * S390 mmu expects. 21 * 22 * This file contains the functions and defines necessary to modify and use 23 * the S390 page table tree. 24 */ 25#ifndef __ASSEMBLY__ 26#include <asm/processor.h> 27#include <linux/threads.h> 28 29extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); 30extern void paging_init(void); 31 32/* Caches aren't brain-dead on S390. */ 33#define flush_cache_all() do { } while (0) 34#define flush_cache_mm(mm) do { } while (0) 35#define flush_cache_range(mm, start, end) do { } while (0) 36#define flush_cache_page(vma, vmaddr) do { } while (0) 37#define flush_page_to_ram(page) do { } while (0) 38#define flush_dcache_page(page) do { } while (0) 39#define flush_icache_range(start, end) do { } while (0) 40#define flush_icache_page(vma,pg) do { } while (0) 41#define flush_icache_user_range(vma,pg,adr,len) do { } while (0) 42 43/* 44 * The S390 doesn't have any external MMU info: the kernel page 45 * tables contain all the necessary information. 46 */ 47#define update_mmu_cache(vma, address, pte) do { } while (0) 48 49/* 50 * ZERO_PAGE is a global shared page that is always zero: used 51 * for zero-mapped memory areas etc.. 52 */ 53extern char empty_zero_page[PAGE_SIZE]; 54#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) 55#endif /* !__ASSEMBLY__ */ 56 57/* 58 * PMD_SHIFT determines the size of the area a second-level page 59 * table can map 60 */ 61#define PMD_SHIFT 21 62#define PMD_SIZE (1UL << PMD_SHIFT) 63#define PMD_MASK (~(PMD_SIZE-1)) 64 65/* PGDIR_SHIFT determines what a third-level page table entry can map */ 66#define PGDIR_SHIFT 30 67#define PGDIR_SIZE (1UL << PGDIR_SHIFT) 68#define PGDIR_MASK (~(PGDIR_SIZE-1)) 69 70/* 71 * entries per page directory level: the S390 is two to five-level, 72 * currently we use a 3 level lookup 73 */ 74#define PTRS_PER_PTE 512 75#define PTRS_PER_PMD 512 76#define PTRS_PER_PGD 2048 77 78/* 79 * pgd entries used up by user/kernel: 80 */ 81#define USER_PTRS_PER_PGD 2048 82#define USER_PGD_PTRS 2048 83#define KERNEL_PGD_PTRS 2048 84#define FIRST_USER_PGD_NR 0 85 86#define pte_ERROR(e) \ 87 printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) 88#define pmd_ERROR(e) \ 89 printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) 90#define pgd_ERROR(e) \ 91 printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) 92 93#ifndef __ASSEMBLY__ 94/* 95 * Just any arbitrary offset to the start of the vmalloc VM area: the 96 * current 8MB value just means that there will be a 8MB "hole" after the 97 * physical memory until the kernel virtual memory starts. That means that 98 * any out-of-bounds memory accesses will hopefully be caught. 99 * The vmalloc() routines leaves a hole of 4kB between each vmalloced 100 * area for the same reason. ;) 101 */ 102#define VMALLOC_OFFSET (8*1024*1024) 103#define VMALLOC_START (((unsigned long) high_memory + VMALLOC_OFFSET) \ 104 & ~(VMALLOC_OFFSET-1)) 105#define VMALLOC_VMADDR(x) ((unsigned long)(x)) 106#define VMALLOC_END (0x20000000000L) 107 108 109/* 110 * A pagetable entry of S390 has following format: 111 * | PFRA |0IP0| OS | 112 * 0000000000111111111122222222223333333333444444444455555555556666 113 * 0123456789012345678901234567890123456789012345678901234567890123 114 * 115 * I Page-Invalid Bit: Page is not available for address-translation 116 * P Page-Protection Bit: Store access not possible for page 117 * 118 * A segmenttable entry of S390 has following format: 119 * | P-table origin | TT 120 * 0000000000111111111122222222223333333333444444444455555555556666 121 * 0123456789012345678901234567890123456789012345678901234567890123 122 * 123 * I Segment-Invalid Bit: Segment is not available for address-translation 124 * C Common-Segment Bit: Segment is not private (PoP 3-30) 125 * P Page-Protection Bit: Store access not possible for page 126 * TT Type 00 127 * 128 * A region table entry of S390 has following format: 129 * | S-table origin | TF TTTL 130 * 0000000000111111111122222222223333333333444444444455555555556666 131 * 0123456789012345678901234567890123456789012345678901234567890123 132 * 133 * I Segment-Invalid Bit: Segment is not available for address-translation 134 * TT Type 01 135 * TF 136 * TL Table lenght 137 * 138 * The regiontable origin of S390 has following format: 139 * | region table origon | DTTL 140 * 0000000000111111111122222222223333333333444444444455555555556666 141 * 0123456789012345678901234567890123456789012345678901234567890123 142 * 143 * X Space-Switch event: 144 * G Segment-Invalid Bit: 145 * P Private-Space Bit: 146 * S Storage-Alteration: 147 * R Real space 148 * TL Table-Length: 149 * 150 * A storage key has the following format: 151 * | ACC |F|R|C|0| 152 * 0 3 4 5 6 7 153 * ACC: access key 154 * F : fetch protection bit 155 * R : referenced bit 156 * C : changed bit 157 */ 158 159/* Bits in the page table entry */ 160#define _PAGE_PRESENT 0x001 /* Software */ 161#define _PAGE_MKCLEAN 0x002 /* Software */ 162#define _PAGE_ISCLEAN 0x004 /* Software */ 163#define _PAGE_RO 0x200 /* HW read-only */ 164#define _PAGE_INVALID 0x400 /* HW invalid */ 165 166/* Bits in the segment table entry */ 167#define _PMD_ENTRY_INV 0x20 /* invalid segment table entry */ 168#define _PMD_ENTRY 0x00 169 170/* Bits in the region third table entry */ 171#define _PGD_ENTRY_INV 0x20 /* region table entry invalid bit */ 172#define _PGD_ENTRY_MASK 0x04 /* region third table entry mask */ 173#define _PGD_ENTRY_LEN(x) ((x)&3) /* region table length bits */ 174#define _PGD_ENTRY_OFF(x) (((x)&3)<<6) /* region table offset bits */ 175 176/* 177 * User and kernel page directory 178 */ 179#define _REGION_THIRD 0x4 180#define _REGION_THIRD_LEN 0x1 181#define _REGION_TABLE (_REGION_THIRD|_REGION_THIRD_LEN|0x40|0x100) 182#define _KERN_REGION_TABLE (_REGION_THIRD|_REGION_THIRD_LEN) 183 184/* Bits in the storage key */ 185#define _PAGE_CHANGED 0x02 /* HW changed bit */ 186#define _PAGE_REFERENCED 0x04 /* HW referenced bit */ 187 188/* 189 * No mapping available 190 */ 191#define PAGE_INVALID __pgprot(_PAGE_INVALID) 192#define PAGE_NONE_SHARED __pgprot(_PAGE_PRESENT|_PAGE_INVALID) 193#define PAGE_NONE_PRIVATE __pgprot(_PAGE_PRESENT|_PAGE_INVALID|_PAGE_ISCLEAN) 194#define PAGE_RO_SHARED __pgprot(_PAGE_PRESENT|_PAGE_RO) 195#define PAGE_RO_PRIVATE __pgprot(_PAGE_PRESENT|_PAGE_RO|_PAGE_ISCLEAN) 196#define PAGE_COPY __pgprot(_PAGE_PRESENT|_PAGE_RO|_PAGE_ISCLEAN) 197#define PAGE_SHARED __pgprot(_PAGE_PRESENT) 198#define PAGE_KERNEL __pgprot(_PAGE_PRESENT) 199 200/* 201 * The S390 can't do page protection for execute, and considers that the 202 * same are read. Also, write permissions imply read permissions. This is 203 * the closest we can get.. 204 */ 205#define __P000 PAGE_NONE_PRIVATE 206#define __P001 PAGE_RO_PRIVATE 207#define __P010 PAGE_COPY 208#define __P011 PAGE_COPY 209#define __P100 PAGE_RO_PRIVATE 210#define __P101 PAGE_RO_PRIVATE 211#define __P110 PAGE_COPY 212#define __P111 PAGE_COPY 213 214#define __S000 PAGE_NONE_SHARED 215#define __S001 PAGE_RO_SHARED 216#define __S010 PAGE_SHARED 217#define __S011 PAGE_SHARED 218#define __S100 PAGE_RO_SHARED 219#define __S101 PAGE_RO_SHARED 220#define __S110 PAGE_SHARED 221#define __S111 PAGE_SHARED 222 223/* 224 * Certain architectures need to do special things when PTEs 225 * within a page table are directly modified. Thus, the following 226 * hook is made available. 227 */ 228extern inline void set_pte(pte_t *pteptr, pte_t pteval) 229{ 230 if ((pte_val(pteval) & (_PAGE_MKCLEAN|_PAGE_INVALID)) 231 == _PAGE_MKCLEAN) 232 { 233 pte_val(pteval) &= ~_PAGE_MKCLEAN; 234 235 asm volatile ("sske %0,%1" 236 : : "d" (0), "a" (pte_val(pteval))); 237 } 238 239 *pteptr = pteval; 240} 241 242#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) 243 244/* 245 * pgd/pmd/pte query functions 246 */ 247extern inline int __pgd_present(pgd_t *pgd) 248{ 249 unsigned long addr = (unsigned long) pgd; 250 unsigned long *pgd_slot = (unsigned long *) (addr & -8); 251 unsigned long offset = (addr & 4) >> 1; 252 253 if (*pgd_slot & _PGD_ENTRY_INV) 254 return 0; 255 if ((*pgd_slot & _PGD_ENTRY_OFF(3)) > _PGD_ENTRY_OFF(offset)) 256 return 0; 257 if ((*pgd_slot & _PGD_ENTRY_LEN(3)) < _PGD_ENTRY_LEN(offset)) 258 return 0; 259 return 1; 260} 261#define pgd_present(pgd) __pgd_present(&(pgd)) 262 263extern inline int __pgd_none(pgd_t *pgd) 264{ 265 return !__pgd_present(pgd); 266} 267#define pgd_none(pgd) __pgd_none(&(pgd)) 268 269extern inline int __pgd_bad(pgd_t *pgd) 270{ 271 unsigned long addr = (unsigned long) pgd; 272 unsigned long *pgd_slot = (unsigned long *) (addr & -8); 273 274 return (*pgd_slot & (~PAGE_MASK & ~_PGD_ENTRY_INV & ~_PGD_ENTRY_MASK & 275 ~_PGD_ENTRY_LEN(3) & ~_PGD_ENTRY_OFF(3))) != 0; 276} 277#define pgd_bad(pgd) __pgd_bad(&(pgd)) 278 279extern inline int pmd_present(pmd_t pmd) 280{ 281 return (pmd_val(pmd) & ~PAGE_MASK) == _PMD_ENTRY; 282} 283 284extern inline int pmd_none(pmd_t pmd) 285{ 286 return pmd_val(pmd) & _PMD_ENTRY_INV; 287} 288 289extern inline int pmd_bad(pmd_t pmd) 290{ 291 return (pmd_val(pmd) & (~PAGE_MASK & ~_PMD_ENTRY_INV)) != _PMD_ENTRY; 292} 293 294extern inline int pte_present(pte_t pte) 295{ 296 return pte_val(pte) & _PAGE_PRESENT; 297} 298 299extern inline int pte_none(pte_t pte) 300{ 301 return ((pte_val(pte) & 302 (_PAGE_INVALID | _PAGE_RO | _PAGE_PRESENT)) == _PAGE_INVALID); 303} 304 305#define pte_same(a,b) (pte_val(a) == pte_val(b)) 306 307/* 308 * query functions pte_write/pte_dirty/pte_young only work if 309 * pte_present() is true. Undefined behaviour if not.. 310 */ 311extern inline int pte_write(pte_t pte) 312{ 313 return (pte_val(pte) & _PAGE_RO) == 0; 314} 315 316extern inline int pte_dirty(pte_t pte) 317{ 318 int skey; 319 320 if (pte_val(pte) & _PAGE_ISCLEAN) 321 return 0; 322 asm volatile ("iske %0,%1" : "=d" (skey) : "a" (pte_val(pte))); 323 return skey & _PAGE_CHANGED; 324} 325 326extern inline int pte_young(pte_t pte) 327{ 328 int skey; 329 330 asm volatile ("iske %0,%1" : "=d" (skey) : "a" (pte_val(pte))); 331 return skey & _PAGE_REFERENCED; 332} 333 334/* 335 * pgd/pmd/pte modification functions 336 */ 337extern inline void pgd_clear(pgd_t * pgdp) 338{ 339 unsigned long addr = (unsigned long) pgdp; 340 unsigned long *pgd_slot = (unsigned long *) (addr & -8); 341 unsigned long offset = addr & 4; 342 343 if (*pgd_slot & _PGD_ENTRY_INV) { 344 *pgd_slot = _PGD_ENTRY_INV; 345 return; 346 } 347 if (offset == 0 && (*pgd_slot & _PGD_ENTRY_LEN(2)) != 0) { 348 /* Clear lower pmd, upper pmd still used. */ 349 *pgd_slot = (*pgd_slot & PAGE_MASK) | _PGD_ENTRY_MASK | 350 _PGD_ENTRY_OFF(2) | _PGD_ENTRY_LEN(3); 351 return; 352 } 353 if (offset == 4 && (*pgd_slot & _PGD_ENTRY_OFF(2)) == 0) { 354 /* Clear upped pmd, lower pmd still used. */ 355 *pgd_slot = (*pgd_slot & PAGE_MASK) | _PGD_ENTRY_MASK | 356 _PGD_ENTRY_OFF(0) | _PGD_ENTRY_LEN(1); 357 return; 358 } 359 *pgd_slot = _PGD_ENTRY_INV; 360} 361 362extern inline void pmd_clear(pmd_t * pmdp) 363{ 364 pmd_val(*pmdp) = _PMD_ENTRY_INV | _PMD_ENTRY; 365 pmd_val1(*pmdp) = _PMD_ENTRY_INV | _PMD_ENTRY; 366} 367 368extern inline void pte_clear(pte_t *ptep) 369{ 370 pte_val(*ptep) = _PAGE_INVALID; 371} 372 373#define PTE_INIT(x) pte_clear(x) 374 375/* 376 * The following pte_modification functions only work if 377 * pte_present() is true. Undefined behaviour if not.. 378 */ 379extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 380{ 381 pte_val(pte) &= PAGE_MASK | _PAGE_ISCLEAN; 382 pte_val(pte) |= pgprot_val(newprot) & ~_PAGE_ISCLEAN; 383 return pte; 384} 385 386extern inline pte_t pte_wrprotect(pte_t pte) 387{ 388 pte_val(pte) |= _PAGE_RO; 389 return pte; 390} 391 392extern inline pte_t pte_mkwrite(pte_t pte) 393{ 394 pte_val(pte) &= ~_PAGE_RO; 395 return pte; 396} 397 398extern inline pte_t pte_mkclean(pte_t pte) 399{ 400 /* The only user of pte_mkclean is the fork() code. 401 We must *not* clear the *physical* page dirty bit 402 just because fork() wants to clear the dirty bit in 403 *one* of the page's mappings. So we just do nothing. */ 404 return pte; 405} 406 407extern inline pte_t pte_mkdirty(pte_t pte) 408{ 409 /* We do not explicitly set the dirty bit because the 410 * sske instruction is slow. It is faster to let the 411 * next instruction set the dirty bit. 412 */ 413 pte_val(pte) &= ~(_PAGE_MKCLEAN | _PAGE_ISCLEAN); 414 return pte; 415} 416 417extern inline pte_t pte_mkold(pte_t pte) 418{ 419 asm volatile ("rrbe 0,%0" : : "a" (pte_val(pte)) : "cc" ); 420 return pte; 421} 422 423extern inline pte_t pte_mkyoung(pte_t pte) 424{ 425 /* To set the referenced bit we read the first word from the real 426 * page with a special instruction: load using real address (lura). 427 * Isn't S/390 a nice architecture ?! */ 428 asm volatile ("lura 0,%0" : : "a" (pte_val(pte) & PAGE_MASK) : "0" ); 429 return pte; 430} 431 432static inline int ptep_test_and_clear_young(pte_t *ptep) 433{ 434 int ccode; 435 436 asm volatile ("rrbe 0,%1\n\t" 437 "ipm %0\n\t" 438 "srl %0,28\n\t" 439 : "=d" (ccode) : "a" (pte_val(*ptep)) : "cc" ); 440 return ccode & 2; 441} 442 443static inline int ptep_test_and_clear_dirty(pte_t *ptep) 444{ 445 int skey; 446 447 if (pte_val(*ptep) & _PAGE_ISCLEAN) 448 return 0; 449 asm volatile ("iske %0,%1" : "=d" (skey) : "a" (*ptep)); 450 if ((skey & _PAGE_CHANGED) == 0) 451 return 0; 452 /* We can't clear the changed bit atomically. For now we 453 * clear (!) the page referenced bit. */ 454 asm volatile ("sske %0,%1" 455 : : "d" (0), "a" (*ptep)); 456 return 1; 457} 458 459static inline pte_t ptep_get_and_clear(pte_t *ptep) 460{ 461 pte_t pte = *ptep; 462 pte_clear(ptep); 463 return pte; 464} 465 466static inline void ptep_set_wrprotect(pte_t *ptep) 467{ 468 pte_t old_pte = *ptep; 469 set_pte(ptep, pte_wrprotect(old_pte)); 470} 471 472static inline void ptep_mkdirty(pte_t *ptep) 473{ 474 pte_mkdirty(*ptep); 475} 476 477/* 478 * Conversion functions: convert a page and protection to a page entry, 479 * and a page entry and page directory to the page they refer to. 480 */ 481extern inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) 482{ 483 pte_t __pte; 484 pte_val(__pte) = physpage + pgprot_val(pgprot); 485 return __pte; 486} 487 488#define mk_pte(pg, pgprot) \ 489({ \ 490 struct page *__page = (pg); \ 491 pgprot_t __pgprot = (pgprot); \ 492 unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT); \ 493 pte_t __pte = mk_pte_phys(__physpage, __pgprot); \ 494 \ 495 if (!(pgprot_val(__pgprot) & _PAGE_ISCLEAN)) { \ 496 int __users = !!__page->buffers + !!__page->mapping; \ 497 if (__users + page_count(__page) == 1) \ 498 pte_val(__pte) |= _PAGE_MKCLEAN; \ 499 } \ 500 __pte; \ 501}) 502 503#define pte_page(x) (mem_map+(unsigned long)((pte_val(x) >> PAGE_SHIFT))) 504 505#define pmd_page(pmd) \ 506 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) 507 508/* to find an entry in a page-table-directory */ 509#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) 510#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) 511 512#define pgd_page(pgd) \ 513 ((unsigned long) __va(__pgd_val(pgd) & PAGE_MASK)) 514 515/* to find an entry in a kernel page-table-directory */ 516#define pgd_offset_k(address) pgd_offset(&init_mm, address) 517 518/* Find an entry in the second-level page table.. */ 519#define pmd_offset(dir,addr) \ 520 ((pmd_t *) pgd_page(dir) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) 521 522/* Find an entry in the third-level page table.. */ 523#define pte_offset(dir,addr) \ 524 ((pte_t *) pmd_page(*(dir)) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) 525 526/* 527 * A page-table entry has some bits we have to treat in a special way. 528 * Bits 52 and bit 55 have to be zero, otherwise an specification 529 * exception will occur instead of a page translation exception. The 530 * specifiation exception has the bad habit not to store necessary 531 * information in the lowcore. 532 * Bit 53 and bit 54 are the page invalid bit and the page protection 533 * bit. We set both to indicate a swapped page. 534 * Bit 63 is used as the software page present bit. If a page is 535 * swapped this obviously has to be zero. 536 * This leaves the bits 0-51 and bits 56-62 to store type and offset. 537 * We use the 7 bits from 56-62 for the type and the 52 bits from 0-51 538 * for the offset. 539 * | offset |0110|type |0 540 * 0000000000111111111122222222223333333333444444444455555555556666 541 * 0123456789012345678901234567890123456789012345678901234567890123 542 */ 543extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) 544{ 545 pte_t pte; 546 pte_val(pte) = (type << 1) | (offset << 12) | _PAGE_INVALID | _PAGE_RO; 547 pte_val(pte) &= 0xfffffffffffff6fe; /* better to be paranoid */ 548 return pte; 549} 550 551#define SWP_TYPE(entry) (((entry).val >> 1) & 0x3f) 552#define SWP_OFFSET(entry) ((entry).val >> 12) 553#define SWP_ENTRY(type,offset) ((swp_entry_t) { pte_val(mk_swap_pte((type),(offset))) }) 554 555#define pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 556#define swp_entry_to_pte(x) ((pte_t) { (x).val }) 557 558#endif /* !__ASSEMBLY__ */ 559 560/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ 561#define PageSkip(page) (0) 562#define kern_addr_valid(addr) (1) 563 564/* 565 * No page table caches to initialise 566 */ 567#define pgtable_cache_init() do { } while (0) 568 569#endif /* _S390_PAGE_H */ 570 571