1/** 2 * kmemcheck - a heavyweight memory checker for the linux kernel 3 * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no> 4 * (With a lot of help from Ingo Molnar and Pekka Enberg.) 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License (version 2) as 8 * published by the Free Software Foundation. 9 */ 10 11#include <linux/init.h> 12#include <linux/interrupt.h> 13#include <linux/kallsyms.h> 14#include <linux/kernel.h> 15#include <linux/kmemcheck.h> 16#include <linux/mm.h> 17#include <linux/module.h> 18#include <linux/page-flags.h> 19#include <linux/percpu.h> 20#include <linux/ptrace.h> 21#include <linux/string.h> 22#include <linux/types.h> 23 24#include <asm/cacheflush.h> 25#include <asm/kmemcheck.h> 26#include <asm/pgtable.h> 27#include <asm/tlbflush.h> 28 29#include "error.h" 30#include "opcode.h" 31#include "pte.h" 32#include "selftest.h" 33#include "shadow.h" 34 35 36#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT 37# define KMEMCHECK_ENABLED 0 38#endif 39 40#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT 41# define KMEMCHECK_ENABLED 1 42#endif 43 44#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT 45# define KMEMCHECK_ENABLED 2 46#endif 47 48int kmemcheck_enabled = KMEMCHECK_ENABLED; 49 50int __init kmemcheck_init(void) 51{ 52#ifdef CONFIG_SMP 53 /* 54 * Limit SMP to use a single CPU. We rely on the fact that this code 55 * runs before SMP is set up. 56 */ 57 if (setup_max_cpus > 1) { 58 printk(KERN_INFO 59 "kmemcheck: Limiting number of CPUs to 1.\n"); 60 setup_max_cpus = 1; 61 } 62#endif 63 64 if (!kmemcheck_selftest()) { 65 printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n"); 66 kmemcheck_enabled = 0; 67 return -EINVAL; 68 } 69 70 printk(KERN_INFO "kmemcheck: Initialized\n"); 71 return 0; 72} 73 74early_initcall(kmemcheck_init); 75 76/* 77 * We need to parse the kmemcheck= option before any memory is allocated. 78 */ 79static int __init param_kmemcheck(char *str) 80{ 81 if (!str) 82 return -EINVAL; 83 84 sscanf(str, "%d", &kmemcheck_enabled); 85 return 0; 86} 87 88early_param("kmemcheck", param_kmemcheck); 89 90int kmemcheck_show_addr(unsigned long address) 91{ 92 pte_t *pte; 93 94 pte = kmemcheck_pte_lookup(address); 95 if (!pte) 96 return 0; 97 98 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); 99 __flush_tlb_one(address); 100 return 1; 101} 102 103int kmemcheck_hide_addr(unsigned long address) 104{ 105 pte_t *pte; 106 107 pte = kmemcheck_pte_lookup(address); 108 if (!pte) 109 return 0; 110 111 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); 112 __flush_tlb_one(address); 113 return 1; 114} 115 116struct kmemcheck_context { 117 bool busy; 118 int balance; 119 120 /* 121 * There can be at most two memory operands to an instruction, but 122 * each address can cross a page boundary -- so we may need up to 123 * four addresses that must be hidden/revealed for each fault. 124 */ 125 unsigned long addr[4]; 126 unsigned long n_addrs; 127 unsigned long flags; 128 129 /* Data size of the instruction that caused a fault. */ 130 unsigned int size; 131}; 132 133static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); 134 135bool kmemcheck_active(struct pt_regs *regs) 136{ 137 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); 138 139 return data->balance > 0; 140} 141 142/* Save an address that needs to be shown/hidden */ 143static void kmemcheck_save_addr(unsigned long addr) 144{ 145 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); 146 147 BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); 148 data->addr[data->n_addrs++] = addr; 149} 150 151static unsigned int kmemcheck_show_all(void) 152{ 153 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); 154 unsigned int i; 155 unsigned int n; 156 157 n = 0; 158 for (i = 0; i < data->n_addrs; ++i) 159 n += kmemcheck_show_addr(data->addr[i]); 160 161 return n; 162} 163 164static unsigned int kmemcheck_hide_all(void) 165{ 166 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); 167 unsigned int i; 168 unsigned int n; 169 170 n = 0; 171 for (i = 0; i < data->n_addrs; ++i) 172 n += kmemcheck_hide_addr(data->addr[i]); 173 174 return n; 175} 176 177/* 178 * Called from the #PF handler. 179 */ 180void kmemcheck_show(struct pt_regs *regs) 181{ 182 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); 183 184 BUG_ON(!irqs_disabled()); 185 186 if (unlikely(data->balance != 0)) { 187 kmemcheck_show_all(); 188 kmemcheck_error_save_bug(regs); 189 data->balance = 0; 190 return; 191 } 192 193 /* 194 * None of the addresses actually belonged to kmemcheck. Note that 195 * this is not an error. 196 */ 197 if (kmemcheck_show_all() == 0) 198 return; 199 200 ++data->balance; 201 202 /* 203 * The IF needs to be cleared as well, so that the faulting 204 * instruction can run "uninterrupted". Otherwise, we might take 205 * an interrupt and start executing that before we've had a chance 206 * to hide the page again. 207 * 208 * NOTE: In the rare case of multiple faults, we must not override 209 * the original flags: 210 */ 211 if (!(regs->flags & X86_EFLAGS_TF)) 212 data->flags = regs->flags; 213 214 regs->flags |= X86_EFLAGS_TF; 215 regs->flags &= ~X86_EFLAGS_IF; 216} 217 218/* 219 * Called from the #DB handler. 220 */ 221void kmemcheck_hide(struct pt_regs *regs) 222{ 223 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); 224 int n; 225 226 BUG_ON(!irqs_disabled()); 227 228 if (unlikely(data->balance != 1)) { 229 kmemcheck_show_all(); 230 kmemcheck_error_save_bug(regs); 231 data->n_addrs = 0; 232 data->balance = 0; 233 234 if (!(data->flags & X86_EFLAGS_TF)) 235 regs->flags &= ~X86_EFLAGS_TF; 236 if (data->flags & X86_EFLAGS_IF) 237 regs->flags |= X86_EFLAGS_IF; 238 return; 239 } 240 241 if (kmemcheck_enabled) 242 n = kmemcheck_hide_all(); 243 else 244 n = kmemcheck_show_all(); 245 246 if (n == 0) 247 return; 248 249 --data->balance; 250 251 data->n_addrs = 0; 252 253 if (!(data->flags & X86_EFLAGS_TF)) 254 regs->flags &= ~X86_EFLAGS_TF; 255 if (data->flags & X86_EFLAGS_IF) 256 regs->flags |= X86_EFLAGS_IF; 257} 258 259void kmemcheck_show_pages(struct page *p, unsigned int n) 260{ 261 unsigned int i; 262 263 for (i = 0; i < n; ++i) { 264 unsigned long address; 265 pte_t *pte; 266 unsigned int level; 267 268 address = (unsigned long) page_address(&p[i]); 269 pte = lookup_address(address, &level); 270 BUG_ON(!pte); 271 BUG_ON(level != PG_LEVEL_4K); 272 273 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); 274 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN)); 275 __flush_tlb_one(address); 276 } 277} 278 279bool kmemcheck_page_is_tracked(struct page *p) 280{ 281 /* This will also check the "hidden" flag of the PTE. */ 282 return kmemcheck_pte_lookup((unsigned long) page_address(p)); 283} 284 285void kmemcheck_hide_pages(struct page *p, unsigned int n) 286{ 287 unsigned int i; 288 289 for (i = 0; i < n; ++i) { 290 unsigned long address; 291 pte_t *pte; 292 unsigned int level; 293 294 address = (unsigned long) page_address(&p[i]); 295 pte = lookup_address(address, &level); 296 BUG_ON(!pte); 297 BUG_ON(level != PG_LEVEL_4K); 298 299 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); 300 set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN)); 301 __flush_tlb_one(address); 302 } 303} 304 305/* Access may NOT cross page boundary */ 306static void kmemcheck_read_strict(struct pt_regs *regs, 307 unsigned long addr, unsigned int size) 308{ 309 void *shadow; 310 enum kmemcheck_shadow status; 311 312 shadow = kmemcheck_shadow_lookup(addr); 313 if (!shadow) 314 return; 315 316 kmemcheck_save_addr(addr); 317 status = kmemcheck_shadow_test(shadow, size); 318 if (status == KMEMCHECK_SHADOW_INITIALIZED) 319 return; 320 321 if (kmemcheck_enabled) 322 kmemcheck_error_save(status, addr, size, regs); 323 324 if (kmemcheck_enabled == 2) 325 kmemcheck_enabled = 0; 326 327 /* Don't warn about it again. */ 328 kmemcheck_shadow_set(shadow, size); 329} 330 331bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) 332{ 333 enum kmemcheck_shadow status; 334 void *shadow; 335 336 shadow = kmemcheck_shadow_lookup(addr); 337 if (!shadow) 338 return true; 339 340 status = kmemcheck_shadow_test_all(shadow, size); 341 342 return status == KMEMCHECK_SHADOW_INITIALIZED; 343} 344 345/* Access may cross page boundary */ 346static void kmemcheck_read(struct pt_regs *regs, 347 unsigned long addr, unsigned int size) 348{ 349 unsigned long page = addr & PAGE_MASK; 350 unsigned long next_addr = addr + size - 1; 351 unsigned long next_page = next_addr & PAGE_MASK; 352 353 if (likely(page == next_page)) { 354 kmemcheck_read_strict(regs, addr, size); 355 return; 356 } 357 358 /* 359 * What we do is basically to split the access across the 360 * two pages and handle each part separately. Yes, this means 361 * that we may now see reads that are 3 + 5 bytes, for 362 * example (and if both are uninitialized, there will be two 363 * reports), but it makes the code a lot simpler. 364 */ 365 kmemcheck_read_strict(regs, addr, next_page - addr); 366 kmemcheck_read_strict(regs, next_page, next_addr - next_page); 367} 368 369static void kmemcheck_write_strict(struct pt_regs *regs, 370 unsigned long addr, unsigned int size) 371{ 372 void *shadow; 373 374 shadow = kmemcheck_shadow_lookup(addr); 375 if (!shadow) 376 return; 377 378 kmemcheck_save_addr(addr); 379 kmemcheck_shadow_set(shadow, size); 380} 381 382static void kmemcheck_write(struct pt_regs *regs, 383 unsigned long addr, unsigned int size) 384{ 385 unsigned long page = addr & PAGE_MASK; 386 unsigned long next_addr = addr + size - 1; 387 unsigned long next_page = next_addr & PAGE_MASK; 388 389 if (likely(page == next_page)) { 390 kmemcheck_write_strict(regs, addr, size); 391 return; 392 } 393 394 /* See comment in kmemcheck_read(). */ 395 kmemcheck_write_strict(regs, addr, next_page - addr); 396 kmemcheck_write_strict(regs, next_page, next_addr - next_page); 397} 398 399/* 400 * Copying is hard. We have two addresses, each of which may be split across 401 * a page (and each page will have different shadow addresses). 402 */ 403static void kmemcheck_copy(struct pt_regs *regs, 404 unsigned long src_addr, unsigned long dst_addr, unsigned int size) 405{ 406 uint8_t shadow[8]; 407 enum kmemcheck_shadow status; 408 409 unsigned long page; 410 unsigned long next_addr; 411 unsigned long next_page; 412 413 uint8_t *x; 414 unsigned int i; 415 unsigned int n; 416 417 BUG_ON(size > sizeof(shadow)); 418 419 page = src_addr & PAGE_MASK; 420 next_addr = src_addr + size - 1; 421 next_page = next_addr & PAGE_MASK; 422 423 if (likely(page == next_page)) { 424 /* Same page */ 425 x = kmemcheck_shadow_lookup(src_addr); 426 if (x) { 427 kmemcheck_save_addr(src_addr); 428 for (i = 0; i < size; ++i) 429 shadow[i] = x[i]; 430 } else { 431 for (i = 0; i < size; ++i) 432 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; 433 } 434 } else { 435 n = next_page - src_addr; 436 BUG_ON(n > sizeof(shadow)); 437 438 /* First page */ 439 x = kmemcheck_shadow_lookup(src_addr); 440 if (x) { 441 kmemcheck_save_addr(src_addr); 442 for (i = 0; i < n; ++i) 443 shadow[i] = x[i]; 444 } else { 445 /* Not tracked */ 446 for (i = 0; i < n; ++i) 447 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; 448 } 449 450 /* Second page */ 451 x = kmemcheck_shadow_lookup(next_page); 452 if (x) { 453 kmemcheck_save_addr(next_page); 454 for (i = n; i < size; ++i) 455 shadow[i] = x[i - n]; 456 } else { 457 /* Not tracked */ 458 for (i = n; i < size; ++i) 459 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; 460 } 461 } 462 463 page = dst_addr & PAGE_MASK; 464 next_addr = dst_addr + size - 1; 465 next_page = next_addr & PAGE_MASK; 466 467 if (likely(page == next_page)) { 468 /* Same page */ 469 x = kmemcheck_shadow_lookup(dst_addr); 470 if (x) { 471 kmemcheck_save_addr(dst_addr); 472 for (i = 0; i < size; ++i) { 473 x[i] = shadow[i]; 474 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; 475 } 476 } 477 } else { 478 n = next_page - dst_addr; 479 BUG_ON(n > sizeof(shadow)); 480 481 /* First page */ 482 x = kmemcheck_shadow_lookup(dst_addr); 483 if (x) { 484 kmemcheck_save_addr(dst_addr); 485 for (i = 0; i < n; ++i) { 486 x[i] = shadow[i]; 487 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; 488 } 489 } 490 491 /* Second page */ 492 x = kmemcheck_shadow_lookup(next_page); 493 if (x) { 494 kmemcheck_save_addr(next_page); 495 for (i = n; i < size; ++i) { 496 x[i - n] = shadow[i]; 497 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; 498 } 499 } 500 } 501 502 status = kmemcheck_shadow_test(shadow, size); 503 if (status == KMEMCHECK_SHADOW_INITIALIZED) 504 return; 505 506 if (kmemcheck_enabled) 507 kmemcheck_error_save(status, src_addr, size, regs); 508 509 if (kmemcheck_enabled == 2) 510 kmemcheck_enabled = 0; 511} 512 513enum kmemcheck_method { 514 KMEMCHECK_READ, 515 KMEMCHECK_WRITE, 516}; 517 518static void kmemcheck_access(struct pt_regs *regs, 519 unsigned long fallback_address, enum kmemcheck_method fallback_method) 520{ 521 const uint8_t *insn; 522 const uint8_t *insn_primary; 523 unsigned int size; 524 525 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); 526 527 /* Recursive fault -- ouch. */ 528 if (data->busy) { 529 kmemcheck_show_addr(fallback_address); 530 kmemcheck_error_save_bug(regs); 531 return; 532 } 533 534 data->busy = true; 535 536 insn = (const uint8_t *) regs->ip; 537 insn_primary = kmemcheck_opcode_get_primary(insn); 538 539 kmemcheck_opcode_decode(insn, &size); 540 541 switch (insn_primary[0]) { 542#ifdef CONFIG_KMEMCHECK_BITOPS_OK 543 /* AND, OR, XOR */ 544 /* 545 * Unfortunately, these instructions have to be excluded from 546 * our regular checking since they access only some (and not 547 * all) bits. This clears out "bogus" bitfield-access warnings. 548 */ 549 case 0x80: 550 case 0x81: 551 case 0x82: 552 case 0x83: 553 switch ((insn_primary[1] >> 3) & 7) { 554 /* OR */ 555 case 1: 556 /* AND */ 557 case 4: 558 /* XOR */ 559 case 6: 560 kmemcheck_write(regs, fallback_address, size); 561 goto out; 562 563 /* ADD */ 564 case 0: 565 /* ADC */ 566 case 2: 567 /* SBB */ 568 case 3: 569 /* SUB */ 570 case 5: 571 /* CMP */ 572 case 7: 573 break; 574 } 575 break; 576#endif 577 578 /* MOVS, MOVSB, MOVSW, MOVSD */ 579 case 0xa4: 580 case 0xa5: 581 /* 582 * These instructions are special because they take two 583 * addresses, but we only get one page fault. 584 */ 585 kmemcheck_copy(regs, regs->si, regs->di, size); 586 goto out; 587 588 /* CMPS, CMPSB, CMPSW, CMPSD */ 589 case 0xa6: 590 case 0xa7: 591 kmemcheck_read(regs, regs->si, size); 592 kmemcheck_read(regs, regs->di, size); 593 goto out; 594 } 595 596 /* 597 * If the opcode isn't special in any way, we use the data from the 598 * page fault handler to determine the address and type of memory 599 * access. 600 */ 601 switch (fallback_method) { 602 case KMEMCHECK_READ: 603 kmemcheck_read(regs, fallback_address, size); 604 goto out; 605 case KMEMCHECK_WRITE: 606 kmemcheck_write(regs, fallback_address, size); 607 goto out; 608 } 609 610out: 611 data->busy = false; 612} 613 614bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, 615 unsigned long error_code) 616{ 617 pte_t *pte; 618 619 if (regs->flags & X86_VM_MASK) 620 return false; 621 if (regs->cs != __KERNEL_CS) 622 return false; 623 624 pte = kmemcheck_pte_lookup(address); 625 if (!pte) 626 return false; 627 628 if (error_code & 2) 629 kmemcheck_access(regs, address, KMEMCHECK_WRITE); 630 else 631 kmemcheck_access(regs, address, KMEMCHECK_READ); 632 633 kmemcheck_show(regs); 634 return true; 635} 636 637bool kmemcheck_trap(struct pt_regs *regs) 638{ 639 if (!kmemcheck_active(regs)) 640 return false; 641 642 /* We're done. */ 643 kmemcheck_hide(regs); 644 return true; 645} 646