1/* 2 * address space "slices" (meta-segments) support 3 * 4 * Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation. 5 * 6 * Based on hugetlb implementation 7 * 8 * Copyright (C) 2003 David Gibson, IBM Corporation. 9 * 10 * This program is free software; you can redistribute it and/or modify 11 * it under the terms of the GNU General Public License as published by 12 * the Free Software Foundation; either version 2 of the License, or 13 * (at your option) any later version. 14 * 15 * This program is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU General Public License for more details. 19 * 20 * You should have received a copy of the GNU General Public License 21 * along with this program; if not, write to the Free Software 22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 */ 24 25#undef DEBUG 26 27#include <linux/kernel.h> 28#include <linux/mm.h> 29#include <linux/pagemap.h> 30#include <linux/err.h> 31#include <linux/spinlock.h> 32#include <linux/module.h> 33#include <asm/mman.h> 34#include <asm/mmu.h> 35#include <asm/spu.h> 36 37static spinlock_t slice_convert_lock = SPIN_LOCK_UNLOCKED; 38 39 40#ifdef DEBUG 41int _slice_debug = 1; 42 43static void slice_print_mask(const char *label, struct slice_mask mask) 44{ 45 char *p, buf[16 + 3 + 16 + 1]; 46 int i; 47 48 if (!_slice_debug) 49 return; 50 p = buf; 51 for (i = 0; i < SLICE_NUM_LOW; i++) 52 *(p++) = (mask.low_slices & (1 << i)) ? '1' : '0'; 53 *(p++) = ' '; 54 *(p++) = '-'; 55 *(p++) = ' '; 56 for (i = 0; i < SLICE_NUM_HIGH; i++) 57 *(p++) = (mask.high_slices & (1 << i)) ? '1' : '0'; 58 *(p++) = 0; 59 60 printk(KERN_DEBUG "%s:%s\n", label, buf); 61} 62 63#define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0) 64 65#else 66 67static void slice_print_mask(const char *label, struct slice_mask mask) {} 68#define slice_dbg(fmt...) 69 70#endif 71 72static struct slice_mask slice_range_to_mask(unsigned long start, 73 unsigned long len) 74{ 75 unsigned long end = start + len - 1; 76 struct slice_mask ret = { 0, 0 }; 77 78 if (start < SLICE_LOW_TOP) { 79 unsigned long mend = min(end, SLICE_LOW_TOP); 80 unsigned long mstart = min(start, SLICE_LOW_TOP); 81 82 ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) 83 - (1u << GET_LOW_SLICE_INDEX(mstart)); 84 } 85 86 if ((start + len) > SLICE_LOW_TOP) 87 ret.high_slices = (1u << (GET_HIGH_SLICE_INDEX(end) + 1)) 88 - (1u << GET_HIGH_SLICE_INDEX(start)); 89 90 return ret; 91} 92 93static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, 94 unsigned long len) 95{ 96 struct vm_area_struct *vma; 97 98 if ((mm->task_size - len) < addr) 99 return 0; 100 vma = find_vma(mm, addr); 101 return (!vma || (addr + len) <= vma->vm_start); 102} 103 104static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice) 105{ 106 return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT, 107 1ul << SLICE_LOW_SHIFT); 108} 109 110static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) 111{ 112 unsigned long start = slice << SLICE_HIGH_SHIFT; 113 unsigned long end = start + (1ul << SLICE_HIGH_SHIFT); 114 115 /* Hack, so that each addresses is controlled by exactly one 116 * of the high or low area bitmaps, the first high area starts 117 * at 4GB, not 0 */ 118 if (start == 0) 119 start = SLICE_LOW_TOP; 120 121 return !slice_area_is_free(mm, start, end - start); 122} 123 124static struct slice_mask slice_mask_for_free(struct mm_struct *mm) 125{ 126 struct slice_mask ret = { 0, 0 }; 127 unsigned long i; 128 129 for (i = 0; i < SLICE_NUM_LOW; i++) 130 if (!slice_low_has_vma(mm, i)) 131 ret.low_slices |= 1u << i; 132 133 if (mm->task_size <= SLICE_LOW_TOP) 134 return ret; 135 136 for (i = 0; i < SLICE_NUM_HIGH; i++) 137 if (!slice_high_has_vma(mm, i)) 138 ret.high_slices |= 1u << i; 139 140 return ret; 141} 142 143static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize) 144{ 145 struct slice_mask ret = { 0, 0 }; 146 unsigned long i; 147 u64 psizes; 148 149 psizes = mm->context.low_slices_psize; 150 for (i = 0; i < SLICE_NUM_LOW; i++) 151 if (((psizes >> (i * 4)) & 0xf) == psize) 152 ret.low_slices |= 1u << i; 153 154 psizes = mm->context.high_slices_psize; 155 for (i = 0; i < SLICE_NUM_HIGH; i++) 156 if (((psizes >> (i * 4)) & 0xf) == psize) 157 ret.high_slices |= 1u << i; 158 159 return ret; 160} 161 162static int slice_check_fit(struct slice_mask mask, struct slice_mask available) 163{ 164 return (mask.low_slices & available.low_slices) == mask.low_slices && 165 (mask.high_slices & available.high_slices) == mask.high_slices; 166} 167 168static void slice_flush_segments(void *parm) 169{ 170 struct mm_struct *mm = parm; 171 unsigned long flags; 172 173 if (mm != current->active_mm) 174 return; 175 176 /* update the paca copy of the context struct */ 177 get_paca()->context = current->active_mm->context; 178 179 local_irq_save(flags); 180 slb_flush_and_rebolt(); 181 local_irq_restore(flags); 182} 183 184static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize) 185{ 186 /* Write the new slice psize bits */ 187 u64 lpsizes, hpsizes; 188 unsigned long i, flags; 189 190 slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); 191 slice_print_mask(" mask", mask); 192 193 /* We need to use a spinlock here to protect against 194 * concurrent 64k -> 4k demotion ... 195 */ 196 spin_lock_irqsave(&slice_convert_lock, flags); 197 198 lpsizes = mm->context.low_slices_psize; 199 for (i = 0; i < SLICE_NUM_LOW; i++) 200 if (mask.low_slices & (1u << i)) 201 lpsizes = (lpsizes & ~(0xful << (i * 4))) | 202 (((unsigned long)psize) << (i * 4)); 203 204 hpsizes = mm->context.high_slices_psize; 205 for (i = 0; i < SLICE_NUM_HIGH; i++) 206 if (mask.high_slices & (1u << i)) 207 hpsizes = (hpsizes & ~(0xful << (i * 4))) | 208 (((unsigned long)psize) << (i * 4)); 209 210 mm->context.low_slices_psize = lpsizes; 211 mm->context.high_slices_psize = hpsizes; 212 213 slice_dbg(" lsps=%lx, hsps=%lx\n", 214 mm->context.low_slices_psize, 215 mm->context.high_slices_psize); 216 217 spin_unlock_irqrestore(&slice_convert_lock, flags); 218 mb(); 219 220 on_each_cpu(slice_flush_segments, mm, 0, 1); 221#ifdef CONFIG_SPU_BASE 222 spu_flush_all_slbs(mm); 223#endif 224} 225 226static unsigned long slice_find_area_bottomup(struct mm_struct *mm, 227 unsigned long len, 228 struct slice_mask available, 229 int psize, int use_cache) 230{ 231 struct vm_area_struct *vma; 232 unsigned long start_addr, addr; 233 struct slice_mask mask; 234 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); 235 236 if (use_cache) { 237 if (len <= mm->cached_hole_size) { 238 start_addr = addr = TASK_UNMAPPED_BASE; 239 mm->cached_hole_size = 0; 240 } else 241 start_addr = addr = mm->free_area_cache; 242 } else 243 start_addr = addr = TASK_UNMAPPED_BASE; 244 245full_search: 246 for (;;) { 247 addr = _ALIGN_UP(addr, 1ul << pshift); 248 if ((TASK_SIZE - len) < addr) 249 break; 250 vma = find_vma(mm, addr); 251 BUG_ON(vma && (addr >= vma->vm_end)); 252 253 mask = slice_range_to_mask(addr, len); 254 if (!slice_check_fit(mask, available)) { 255 if (addr < SLICE_LOW_TOP) 256 addr = _ALIGN_UP(addr + 1, 1ul << SLICE_LOW_SHIFT); 257 else 258 addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT); 259 continue; 260 } 261 if (!vma || addr + len <= vma->vm_start) { 262 /* 263 * Remember the place where we stopped the search: 264 */ 265 if (use_cache) 266 mm->free_area_cache = addr + len; 267 return addr; 268 } 269 if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) 270 mm->cached_hole_size = vma->vm_start - addr; 271 addr = vma->vm_end; 272 } 273 274 /* Make sure we didn't miss any holes */ 275 if (use_cache && start_addr != TASK_UNMAPPED_BASE) { 276 start_addr = addr = TASK_UNMAPPED_BASE; 277 mm->cached_hole_size = 0; 278 goto full_search; 279 } 280 return -ENOMEM; 281} 282 283static unsigned long slice_find_area_topdown(struct mm_struct *mm, 284 unsigned long len, 285 struct slice_mask available, 286 int psize, int use_cache) 287{ 288 struct vm_area_struct *vma; 289 unsigned long addr; 290 struct slice_mask mask; 291 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); 292 293 /* check if free_area_cache is useful for us */ 294 if (use_cache) { 295 if (len <= mm->cached_hole_size) { 296 mm->cached_hole_size = 0; 297 mm->free_area_cache = mm->mmap_base; 298 } 299 300 /* either no address requested or can't fit in requested 301 * address hole 302 */ 303 addr = mm->free_area_cache; 304 305 /* make sure it can fit in the remaining address space */ 306 if (addr > len) { 307 addr = _ALIGN_DOWN(addr - len, 1ul << pshift); 308 mask = slice_range_to_mask(addr, len); 309 if (slice_check_fit(mask, available) && 310 slice_area_is_free(mm, addr, len)) 311 /* remember the address as a hint for 312 * next time 313 */ 314 return (mm->free_area_cache = addr); 315 } 316 } 317 318 addr = mm->mmap_base; 319 while (addr > len) { 320 /* Go down by chunk size */ 321 addr = _ALIGN_DOWN(addr - len, 1ul << pshift); 322 323 /* Check for hit with different page size */ 324 mask = slice_range_to_mask(addr, len); 325 if (!slice_check_fit(mask, available)) { 326 if (addr < SLICE_LOW_TOP) 327 addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT); 328 else if (addr < (1ul << SLICE_HIGH_SHIFT)) 329 addr = SLICE_LOW_TOP; 330 else 331 addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT); 332 continue; 333 } 334 335 /* 336 * Lookup failure means no vma is above this address, 337 * else if new region fits below vma->vm_start, 338 * return with success: 339 */ 340 vma = find_vma(mm, addr); 341 if (!vma || (addr + len) <= vma->vm_start) { 342 /* remember the address as a hint for next time */ 343 if (use_cache) 344 mm->free_area_cache = addr; 345 return addr; 346 } 347 348 /* remember the largest hole we saw so far */ 349 if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start) 350 mm->cached_hole_size = vma->vm_start - addr; 351 352 /* try just below the current vma->vm_start */ 353 addr = vma->vm_start; 354 } 355 356 /* 357 * A failed mmap() very likely causes application failure, 358 * so fall back to the bottom-up function here. This scenario 359 * can happen with large stack limits and large mmap() 360 * allocations. 361 */ 362 addr = slice_find_area_bottomup(mm, len, available, psize, 0); 363 364 /* 365 * Restore the topdown base: 366 */ 367 if (use_cache) { 368 mm->free_area_cache = mm->mmap_base; 369 mm->cached_hole_size = ~0UL; 370 } 371 372 return addr; 373} 374 375 376static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, 377 struct slice_mask mask, int psize, 378 int topdown, int use_cache) 379{ 380 if (topdown) 381 return slice_find_area_topdown(mm, len, mask, psize, use_cache); 382 else 383 return slice_find_area_bottomup(mm, len, mask, psize, use_cache); 384} 385 386unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, 387 unsigned long flags, unsigned int psize, 388 int topdown, int use_cache) 389{ 390 struct slice_mask mask; 391 struct slice_mask good_mask; 392 struct slice_mask potential_mask = {0,0} /* silence stupid warning */; 393 int pmask_set = 0; 394 int fixed = (flags & MAP_FIXED); 395 int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); 396 struct mm_struct *mm = current->mm; 397 398 /* Sanity checks */ 399 BUG_ON(mm->task_size == 0); 400 401 slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize); 402 slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n", 403 addr, len, flags, topdown, use_cache); 404 405 if (len > mm->task_size) 406 return -ENOMEM; 407 if (fixed && (addr & ((1ul << pshift) - 1))) 408 return -EINVAL; 409 if (fixed && addr > (mm->task_size - len)) 410 return -EINVAL; 411 412 /* If hint, make sure it matches our alignment restrictions */ 413 if (!fixed && addr) { 414 addr = _ALIGN_UP(addr, 1ul << pshift); 415 slice_dbg(" aligned addr=%lx\n", addr); 416 } 417 418 /* First makeup a "good" mask of slices that have the right size 419 * already 420 */ 421 good_mask = slice_mask_for_size(mm, psize); 422 slice_print_mask(" good_mask", good_mask); 423 424 /* First check hint if it's valid or if we have MAP_FIXED */ 425 if ((addr != 0 || fixed) && (mm->task_size - len) >= addr) { 426 427 /* Don't bother with hint if it overlaps a VMA */ 428 if (!fixed && !slice_area_is_free(mm, addr, len)) 429 goto search; 430 431 /* Build a mask for the requested range */ 432 mask = slice_range_to_mask(addr, len); 433 slice_print_mask(" mask", mask); 434 435 /* Check if we fit in the good mask. If we do, we just return, 436 * nothing else to do 437 */ 438 if (slice_check_fit(mask, good_mask)) { 439 slice_dbg(" fits good !\n"); 440 return addr; 441 } 442 443 /* We don't fit in the good mask, check what other slices are 444 * empty and thus can be converted 445 */ 446 potential_mask = slice_mask_for_free(mm); 447 potential_mask.low_slices |= good_mask.low_slices; 448 potential_mask.high_slices |= good_mask.high_slices; 449 pmask_set = 1; 450 slice_print_mask(" potential", potential_mask); 451 if (slice_check_fit(mask, potential_mask)) { 452 slice_dbg(" fits potential !\n"); 453 goto convert; 454 } 455 } 456 457 /* If we have MAP_FIXED and failed the above step, then error out */ 458 if (fixed) 459 return -EBUSY; 460 461 search: 462 slice_dbg(" search...\n"); 463 464 /* Now let's see if we can find something in the existing slices 465 * for that size 466 */ 467 addr = slice_find_area(mm, len, good_mask, psize, topdown, use_cache); 468 if (addr != -ENOMEM) { 469 /* Found within the good mask, we don't have to setup, 470 * we thus return directly 471 */ 472 slice_dbg(" found area at 0x%lx\n", addr); 473 return addr; 474 } 475 476 /* Won't fit, check what can be converted */ 477 if (!pmask_set) { 478 potential_mask = slice_mask_for_free(mm); 479 potential_mask.low_slices |= good_mask.low_slices; 480 potential_mask.high_slices |= good_mask.high_slices; 481 pmask_set = 1; 482 slice_print_mask(" potential", potential_mask); 483 } 484 485 /* Now let's see if we can find something in the existing slices 486 * for that size 487 */ 488 addr = slice_find_area(mm, len, potential_mask, psize, topdown, 489 use_cache); 490 if (addr == -ENOMEM) 491 return -ENOMEM; 492 493 mask = slice_range_to_mask(addr, len); 494 slice_dbg(" found potential area at 0x%lx\n", addr); 495 slice_print_mask(" mask", mask); 496 497 convert: 498 slice_convert(mm, mask, psize); 499 return addr; 500 501} 502EXPORT_SYMBOL_GPL(slice_get_unmapped_area); 503 504unsigned long arch_get_unmapped_area(struct file *filp, 505 unsigned long addr, 506 unsigned long len, 507 unsigned long pgoff, 508 unsigned long flags) 509{ 510 return slice_get_unmapped_area(addr, len, flags, 511 current->mm->context.user_psize, 512 0, 1); 513} 514 515unsigned long arch_get_unmapped_area_topdown(struct file *filp, 516 const unsigned long addr0, 517 const unsigned long len, 518 const unsigned long pgoff, 519 const unsigned long flags) 520{ 521 return slice_get_unmapped_area(addr0, len, flags, 522 current->mm->context.user_psize, 523 1, 1); 524} 525 526unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) 527{ 528 u64 psizes; 529 int index; 530 531 if (addr < SLICE_LOW_TOP) { 532 psizes = mm->context.low_slices_psize; 533 index = GET_LOW_SLICE_INDEX(addr); 534 } else { 535 psizes = mm->context.high_slices_psize; 536 index = GET_HIGH_SLICE_INDEX(addr); 537 } 538 539 return (psizes >> (index * 4)) & 0xf; 540} 541EXPORT_SYMBOL_GPL(get_slice_psize); 542 543/* 544 * This is called by hash_page when it needs to do a lazy conversion of 545 * an address space from real 64K pages to combo 4K pages (typically 546 * when hitting a non cacheable mapping on a processor or hypervisor 547 * that won't allow them for 64K pages). 548 * 549 * This is also called in init_new_context() to change back the user 550 * psize from whatever the parent context had it set to 551 * 552 * This function will only change the content of the {low,high)_slice_psize 553 * masks, it will not flush SLBs as this shall be handled lazily by the 554 * caller. 555 */ 556void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) 557{ 558 unsigned long flags, lpsizes, hpsizes; 559 unsigned int old_psize; 560 int i; 561 562 slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize); 563 564 spin_lock_irqsave(&slice_convert_lock, flags); 565 566 old_psize = mm->context.user_psize; 567 slice_dbg(" old_psize=%d\n", old_psize); 568 if (old_psize == psize) 569 goto bail; 570 571 mm->context.user_psize = psize; 572 wmb(); 573 574 lpsizes = mm->context.low_slices_psize; 575 for (i = 0; i < SLICE_NUM_LOW; i++) 576 if (((lpsizes >> (i * 4)) & 0xf) == old_psize) 577 lpsizes = (lpsizes & ~(0xful << (i * 4))) | 578 (((unsigned long)psize) << (i * 4)); 579 580 hpsizes = mm->context.high_slices_psize; 581 for (i = 0; i < SLICE_NUM_HIGH; i++) 582 if (((hpsizes >> (i * 4)) & 0xf) == old_psize) 583 hpsizes = (hpsizes & ~(0xful << (i * 4))) | 584 (((unsigned long)psize) << (i * 4)); 585 586 mm->context.low_slices_psize = lpsizes; 587 mm->context.high_slices_psize = hpsizes; 588 589 slice_dbg(" lsps=%lx, hsps=%lx\n", 590 mm->context.low_slices_psize, 591 mm->context.high_slices_psize); 592 593 bail: 594 spin_unlock_irqrestore(&slice_convert_lock, flags); 595} 596 597/* 598 * is_hugepage_only_range() is used by generic code to verify wether 599 * a normal mmap mapping (non hugetlbfs) is valid on a given area. 600 * 601 * until the generic code provides a more generic hook and/or starts 602 * calling arch get_unmapped_area for MAP_FIXED (which our implementation 603 * here knows how to deal with), we hijack it to keep standard mappings 604 * away from us. 605 * 606 * because of that generic code limitation, MAP_FIXED mapping cannot 607 * "convert" back a slice with no VMAs to the standard page size, only 608 * get_unmapped_area() can. It would be possible to fix it here but I 609 * prefer working on fixing the generic code instead. 610 * 611 * WARNING: This will not work if hugetlbfs isn't enabled since the 612 * generic code will redefine that function as 0 in that. This is ok 613 * for now as we only use slices with hugetlbfs enabled. This should 614 * be fixed as the generic code gets fixed. 615 */ 616int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, 617 unsigned long len) 618{ 619 struct slice_mask mask, available; 620 621 mask = slice_range_to_mask(addr, len); 622 available = slice_mask_for_size(mm, mm->context.user_psize); 623 624 return !slice_check_fit(mask, available); 625} 626