1/* 2 * builtin-top.c 3 * 4 * Builtin top command: Display a continuously updated profile of 5 * any workload, CPU or specific PID. 6 * 7 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> 8 * 9 * Improvements and fixes by: 10 * 11 * Arjan van de Ven <arjan@linux.intel.com> 12 * Yanmin Zhang <yanmin.zhang@intel.com> 13 * Wu Fengguang <fengguang.wu@intel.com> 14 * Mike Galbraith <efault@gmx.de> 15 * Paul Mackerras <paulus@samba.org> 16 * 17 * Released under the GPL v2. (and only v2, not any later version) 18 */ 19#include "builtin.h" 20 21#include "perf.h" 22 23#include "util/color.h" 24#include "util/session.h" 25#include "util/symbol.h" 26#include "util/thread.h" 27#include "util/util.h" 28#include <linux/rbtree.h> 29#include "util/parse-options.h" 30#include "util/parse-events.h" 31#include "util/cpumap.h" 32 33#include "util/debug.h" 34 35#include <assert.h> 36#include <fcntl.h> 37 38#include <stdio.h> 39#include <termios.h> 40#include <unistd.h> 41 42#include <errno.h> 43#include <time.h> 44#include <sched.h> 45#include <pthread.h> 46 47#include <sys/syscall.h> 48#include <sys/ioctl.h> 49#include <sys/poll.h> 50#include <sys/prctl.h> 51#include <sys/wait.h> 52#include <sys/uio.h> 53#include <sys/mman.h> 54 55#include <linux/unistd.h> 56#include <linux/types.h> 57 58static int *fd[MAX_NR_CPUS][MAX_COUNTERS]; 59 60static bool system_wide = false; 61 62static int default_interval = 0; 63 64static int count_filter = 5; 65static int print_entries; 66 67static int target_pid = -1; 68static int target_tid = -1; 69static pid_t *all_tids = NULL; 70static int thread_num = 0; 71static bool inherit = false; 72static int profile_cpu = -1; 73static int nr_cpus = 0; 74static int realtime_prio = 0; 75static bool group = false; 76static unsigned int page_size; 77static unsigned int mmap_pages = 16; 78static int freq = 1000; /* 1 KHz */ 79 80static int delay_secs = 2; 81static bool zero = false; 82static bool dump_symtab = false; 83 84static bool hide_kernel_symbols = false; 85static bool hide_user_symbols = false; 86static struct winsize winsize; 87 88/* 89 * Source 90 */ 91 92struct source_line { 93 u64 eip; 94 unsigned long count[MAX_COUNTERS]; 95 char *line; 96 struct source_line *next; 97}; 98 99static const char *sym_filter = NULL; 100struct sym_entry *sym_filter_entry = NULL; 101struct sym_entry *sym_filter_entry_sched = NULL; 102static int sym_pcnt_filter = 5; 103static int sym_counter = 0; 104static int display_weighted = -1; 105static const char *cpu_list; 106 107/* 108 * Symbols 109 */ 110 111struct sym_entry_source { 112 struct source_line *source; 113 struct source_line *lines; 114 struct source_line **lines_tail; 115 pthread_mutex_t lock; 116}; 117 118struct sym_entry { 119 struct rb_node rb_node; 120 struct list_head node; 121 unsigned long snap_count; 122 double weight; 123 int skip; 124 u16 name_len; 125 u8 origin; 126 struct map *map; 127 struct sym_entry_source *src; 128 unsigned long count[0]; 129}; 130 131/* 132 * Source functions 133 */ 134 135static inline struct symbol *sym_entry__symbol(struct sym_entry *self) 136{ 137 return ((void *)self) + symbol_conf.priv_size; 138} 139 140void get_term_dimensions(struct winsize *ws) 141{ 142 char *s = getenv("LINES"); 143 144 if (s != NULL) { 145 ws->ws_row = atoi(s); 146 s = getenv("COLUMNS"); 147 if (s != NULL) { 148 ws->ws_col = atoi(s); 149 if (ws->ws_row && ws->ws_col) 150 return; 151 } 152 } 153#ifdef TIOCGWINSZ 154 if (ioctl(1, TIOCGWINSZ, ws) == 0 && 155 ws->ws_row && ws->ws_col) 156 return; 157#endif 158 ws->ws_row = 25; 159 ws->ws_col = 80; 160} 161 162static void update_print_entries(struct winsize *ws) 163{ 164 print_entries = ws->ws_row; 165 166 if (print_entries > 9) 167 print_entries -= 9; 168} 169 170static void sig_winch_handler(int sig __used) 171{ 172 get_term_dimensions(&winsize); 173 update_print_entries(&winsize); 174} 175 176static int parse_source(struct sym_entry *syme) 177{ 178 struct symbol *sym; 179 struct sym_entry_source *source; 180 struct map *map; 181 FILE *file; 182 char command[PATH_MAX*2]; 183 const char *path; 184 u64 len; 185 186 if (!syme) 187 return -1; 188 189 sym = sym_entry__symbol(syme); 190 map = syme->map; 191 192 /* 193 * We can't annotate with just /proc/kallsyms 194 */ 195 if (map->dso->origin == DSO__ORIG_KERNEL) 196 return -1; 197 198 if (syme->src == NULL) { 199 syme->src = zalloc(sizeof(*source)); 200 if (syme->src == NULL) 201 return -1; 202 pthread_mutex_init(&syme->src->lock, NULL); 203 } 204 205 source = syme->src; 206 207 if (source->lines) { 208 pthread_mutex_lock(&source->lock); 209 goto out_assign; 210 } 211 path = map->dso->long_name; 212 213 len = sym->end - sym->start; 214 215 sprintf(command, 216 "objdump --start-address=%#0*Lx --stop-address=%#0*Lx -dS %s", 217 BITS_PER_LONG / 4, map__rip_2objdump(map, sym->start), 218 BITS_PER_LONG / 4, map__rip_2objdump(map, sym->end), path); 219 220 file = popen(command, "r"); 221 if (!file) 222 return -1; 223 224 pthread_mutex_lock(&source->lock); 225 source->lines_tail = &source->lines; 226 while (!feof(file)) { 227 struct source_line *src; 228 size_t dummy = 0; 229 char *c, *sep; 230 231 src = malloc(sizeof(struct source_line)); 232 assert(src != NULL); 233 memset(src, 0, sizeof(struct source_line)); 234 235 if (getline(&src->line, &dummy, file) < 0) 236 break; 237 if (!src->line) 238 break; 239 240 c = strchr(src->line, '\n'); 241 if (c) 242 *c = 0; 243 244 src->next = NULL; 245 *source->lines_tail = src; 246 source->lines_tail = &src->next; 247 248 src->eip = strtoull(src->line, &sep, 16); 249 if (*sep == ':') 250 src->eip = map__objdump_2ip(map, src->eip); 251 else /* this line has no ip info (e.g. source line) */ 252 src->eip = 0; 253 } 254 pclose(file); 255out_assign: 256 sym_filter_entry = syme; 257 pthread_mutex_unlock(&source->lock); 258 return 0; 259} 260 261static void __zero_source_counters(struct sym_entry *syme) 262{ 263 int i; 264 struct source_line *line; 265 266 line = syme->src->lines; 267 while (line) { 268 for (i = 0; i < nr_counters; i++) 269 line->count[i] = 0; 270 line = line->next; 271 } 272} 273 274static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) 275{ 276 struct source_line *line; 277 278 if (syme != sym_filter_entry) 279 return; 280 281 if (pthread_mutex_trylock(&syme->src->lock)) 282 return; 283 284 if (syme->src == NULL || syme->src->source == NULL) 285 goto out_unlock; 286 287 for (line = syme->src->lines; line; line = line->next) { 288 /* skip lines without IP info */ 289 if (line->eip == 0) 290 continue; 291 if (line->eip == ip) { 292 line->count[counter]++; 293 break; 294 } 295 if (line->eip > ip) 296 break; 297 } 298out_unlock: 299 pthread_mutex_unlock(&syme->src->lock); 300} 301 302#define PATTERN_LEN (BITS_PER_LONG / 4 + 2) 303 304static void lookup_sym_source(struct sym_entry *syme) 305{ 306 struct symbol *symbol = sym_entry__symbol(syme); 307 struct source_line *line; 308 char pattern[PATTERN_LEN + 1]; 309 310 sprintf(pattern, "%0*Lx <", BITS_PER_LONG / 4, 311 map__rip_2objdump(syme->map, symbol->start)); 312 313 pthread_mutex_lock(&syme->src->lock); 314 for (line = syme->src->lines; line; line = line->next) { 315 if (memcmp(line->line, pattern, PATTERN_LEN) == 0) { 316 syme->src->source = line; 317 break; 318 } 319 } 320 pthread_mutex_unlock(&syme->src->lock); 321} 322 323static void show_lines(struct source_line *queue, int count, int total) 324{ 325 int i; 326 struct source_line *line; 327 328 line = queue; 329 for (i = 0; i < count; i++) { 330 float pcnt = 100.0*(float)line->count[sym_counter]/(float)total; 331 332 printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); 333 line = line->next; 334 } 335} 336 337#define TRACE_COUNT 3 338 339static void show_details(struct sym_entry *syme) 340{ 341 struct symbol *symbol; 342 struct source_line *line; 343 struct source_line *line_queue = NULL; 344 int displayed = 0; 345 int line_queue_count = 0, total = 0, more = 0; 346 347 if (!syme) 348 return; 349 350 if (!syme->src->source) 351 lookup_sym_source(syme); 352 353 if (!syme->src->source) 354 return; 355 356 symbol = sym_entry__symbol(syme); 357 printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); 358 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); 359 360 pthread_mutex_lock(&syme->src->lock); 361 line = syme->src->source; 362 while (line) { 363 total += line->count[sym_counter]; 364 line = line->next; 365 } 366 367 line = syme->src->source; 368 while (line) { 369 float pcnt = 0.0; 370 371 if (!line_queue_count) 372 line_queue = line; 373 line_queue_count++; 374 375 if (line->count[sym_counter]) 376 pcnt = 100.0 * line->count[sym_counter] / (float)total; 377 if (pcnt >= (float)sym_pcnt_filter) { 378 if (displayed <= print_entries) 379 show_lines(line_queue, line_queue_count, total); 380 else more++; 381 displayed += line_queue_count; 382 line_queue_count = 0; 383 line_queue = NULL; 384 } else if (line_queue_count > TRACE_COUNT) { 385 line_queue = line_queue->next; 386 line_queue_count--; 387 } 388 389 line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; 390 line = line->next; 391 } 392 pthread_mutex_unlock(&syme->src->lock); 393 if (more) 394 printf("%d lines not displayed, maybe increase display entries [e]\n", more); 395} 396 397/* 398 * Symbols will be added here in event__process_sample and will get out 399 * after decayed. 400 */ 401static LIST_HEAD(active_symbols); 402static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER; 403 404/* 405 * Ordering weight: count-1 * count-2 * ... / count-n 406 */ 407static double sym_weight(const struct sym_entry *sym) 408{ 409 double weight = sym->snap_count; 410 int counter; 411 412 if (!display_weighted) 413 return weight; 414 415 for (counter = 1; counter < nr_counters-1; counter++) 416 weight *= sym->count[counter]; 417 418 weight /= (sym->count[counter] + 1); 419 420 return weight; 421} 422 423static long samples; 424static long kernel_samples, us_samples; 425static long exact_samples; 426static long guest_us_samples, guest_kernel_samples; 427static const char CONSOLE_CLEAR[] = "[H[2J"; 428 429static void __list_insert_active_sym(struct sym_entry *syme) 430{ 431 list_add(&syme->node, &active_symbols); 432} 433 434static void list_remove_active_sym(struct sym_entry *syme) 435{ 436 pthread_mutex_lock(&active_symbols_lock); 437 list_del_init(&syme->node); 438 pthread_mutex_unlock(&active_symbols_lock); 439} 440 441static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) 442{ 443 struct rb_node **p = &tree->rb_node; 444 struct rb_node *parent = NULL; 445 struct sym_entry *iter; 446 447 while (*p != NULL) { 448 parent = *p; 449 iter = rb_entry(parent, struct sym_entry, rb_node); 450 451 if (se->weight > iter->weight) 452 p = &(*p)->rb_left; 453 else 454 p = &(*p)->rb_right; 455 } 456 457 rb_link_node(&se->rb_node, parent, p); 458 rb_insert_color(&se->rb_node, tree); 459} 460 461static void print_sym_table(void) 462{ 463 int printed = 0, j; 464 int counter, snap = !display_weighted ? sym_counter : 0; 465 float samples_per_sec = samples/delay_secs; 466 float ksamples_per_sec = kernel_samples/delay_secs; 467 float us_samples_per_sec = (us_samples)/delay_secs; 468 float guest_kernel_samples_per_sec = (guest_kernel_samples)/delay_secs; 469 float guest_us_samples_per_sec = (guest_us_samples)/delay_secs; 470 float esamples_percent = (100.0*exact_samples)/samples; 471 float sum_ksamples = 0.0; 472 struct sym_entry *syme, *n; 473 struct rb_root tmp = RB_ROOT; 474 struct rb_node *nd; 475 int sym_width = 0, dso_width = 0, dso_short_width = 0; 476 const int win_width = winsize.ws_col - 1; 477 478 samples = us_samples = kernel_samples = exact_samples = 0; 479 guest_kernel_samples = guest_us_samples = 0; 480 481 /* Sort the active symbols */ 482 pthread_mutex_lock(&active_symbols_lock); 483 syme = list_entry(active_symbols.next, struct sym_entry, node); 484 pthread_mutex_unlock(&active_symbols_lock); 485 486 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 487 syme->snap_count = syme->count[snap]; 488 if (syme->snap_count != 0) { 489 490 if ((hide_user_symbols && 491 syme->origin == PERF_RECORD_MISC_USER) || 492 (hide_kernel_symbols && 493 syme->origin == PERF_RECORD_MISC_KERNEL)) { 494 list_remove_active_sym(syme); 495 continue; 496 } 497 syme->weight = sym_weight(syme); 498 rb_insert_active_sym(&tmp, syme); 499 sum_ksamples += syme->snap_count; 500 501 for (j = 0; j < nr_counters; j++) 502 syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8; 503 } else 504 list_remove_active_sym(syme); 505 } 506 507 puts(CONSOLE_CLEAR); 508 509 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 510 if (!perf_guest) { 511 printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%%" 512 " exact: %4.1f%% [", 513 samples_per_sec, 514 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) / 515 samples_per_sec)), 516 esamples_percent); 517 } else { 518 printf(" PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%" 519 " guest kernel:%4.1f%% guest us:%4.1f%%" 520 " exact: %4.1f%% [", 521 samples_per_sec, 522 100.0 - (100.0 * ((samples_per_sec-ksamples_per_sec) / 523 samples_per_sec)), 524 100.0 - (100.0 * ((samples_per_sec-us_samples_per_sec) / 525 samples_per_sec)), 526 100.0 - (100.0 * ((samples_per_sec - 527 guest_kernel_samples_per_sec) / 528 samples_per_sec)), 529 100.0 - (100.0 * ((samples_per_sec - 530 guest_us_samples_per_sec) / 531 samples_per_sec)), 532 esamples_percent); 533 } 534 535 if (nr_counters == 1 || !display_weighted) { 536 printf("%Ld", (u64)attrs[0].sample_period); 537 if (freq) 538 printf("Hz "); 539 else 540 printf(" "); 541 } 542 543 if (!display_weighted) 544 printf("%s", event_name(sym_counter)); 545 else for (counter = 0; counter < nr_counters; counter++) { 546 if (counter) 547 printf("/"); 548 549 printf("%s", event_name(counter)); 550 } 551 552 printf( "], "); 553 554 if (target_pid != -1) 555 printf(" (target_pid: %d", target_pid); 556 else if (target_tid != -1) 557 printf(" (target_tid: %d", target_tid); 558 else 559 printf(" (all"); 560 561 if (profile_cpu != -1) 562 printf(", cpu: %d)\n", profile_cpu); 563 else { 564 if (target_tid != -1) 565 printf(")\n"); 566 else 567 printf(", %d CPUs)\n", nr_cpus); 568 } 569 570 printf("%-*.*s\n", win_width, win_width, graph_dotted_line); 571 572 if (sym_filter_entry) { 573 show_details(sym_filter_entry); 574 return; 575 } 576 577 /* 578 * Find the longest symbol name that will be displayed 579 */ 580 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { 581 syme = rb_entry(nd, struct sym_entry, rb_node); 582 if (++printed > print_entries || 583 (int)syme->snap_count < count_filter) 584 continue; 585 586 if (syme->map->dso->long_name_len > dso_width) 587 dso_width = syme->map->dso->long_name_len; 588 589 if (syme->map->dso->short_name_len > dso_short_width) 590 dso_short_width = syme->map->dso->short_name_len; 591 592 if (syme->name_len > sym_width) 593 sym_width = syme->name_len; 594 } 595 596 printed = 0; 597 598 if (sym_width + dso_width > winsize.ws_col - 29) { 599 dso_width = dso_short_width; 600 if (sym_width + dso_width > winsize.ws_col - 29) 601 sym_width = winsize.ws_col - dso_width - 29; 602 } 603 putchar('\n'); 604 if (nr_counters == 1) 605 printf(" samples pcnt"); 606 else 607 printf(" weight samples pcnt"); 608 609 if (verbose) 610 printf(" RIP "); 611 printf(" %-*.*s DSO\n", sym_width, sym_width, "function"); 612 printf(" %s _______ _____", 613 nr_counters == 1 ? " " : "______"); 614 if (verbose) 615 printf(" ________________"); 616 printf(" %-*.*s", sym_width, sym_width, graph_line); 617 printf(" %-*.*s", dso_width, dso_width, graph_line); 618 puts("\n"); 619 620 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { 621 struct symbol *sym; 622 double pcnt; 623 624 syme = rb_entry(nd, struct sym_entry, rb_node); 625 sym = sym_entry__symbol(syme); 626 if (++printed > print_entries || (int)syme->snap_count < count_filter) 627 continue; 628 629 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / 630 sum_ksamples)); 631 632 if (nr_counters == 1 || !display_weighted) 633 printf("%20.2f ", syme->weight); 634 else 635 printf("%9.1f %10ld ", syme->weight, syme->snap_count); 636 637 percent_color_fprintf(stdout, "%4.1f%%", pcnt); 638 if (verbose) 639 printf(" %016llx", sym->start); 640 printf(" %-*.*s", sym_width, sym_width, sym->name); 641 printf(" %-*.*s\n", dso_width, dso_width, 642 dso_width >= syme->map->dso->long_name_len ? 643 syme->map->dso->long_name : 644 syme->map->dso->short_name); 645 } 646} 647 648static void prompt_integer(int *target, const char *msg) 649{ 650 char *buf = malloc(0), *p; 651 size_t dummy = 0; 652 int tmp; 653 654 fprintf(stdout, "\n%s: ", msg); 655 if (getline(&buf, &dummy, stdin) < 0) 656 return; 657 658 p = strchr(buf, '\n'); 659 if (p) 660 *p = 0; 661 662 p = buf; 663 while(*p) { 664 if (!isdigit(*p)) 665 goto out_free; 666 p++; 667 } 668 tmp = strtoul(buf, NULL, 10); 669 *target = tmp; 670out_free: 671 free(buf); 672} 673 674static void prompt_percent(int *target, const char *msg) 675{ 676 int tmp = 0; 677 678 prompt_integer(&tmp, msg); 679 if (tmp >= 0 && tmp <= 100) 680 *target = tmp; 681} 682 683static void prompt_symbol(struct sym_entry **target, const char *msg) 684{ 685 char *buf = malloc(0), *p; 686 struct sym_entry *syme = *target, *n, *found = NULL; 687 size_t dummy = 0; 688 689 /* zero counters of active symbol */ 690 if (syme) { 691 pthread_mutex_lock(&syme->src->lock); 692 __zero_source_counters(syme); 693 *target = NULL; 694 pthread_mutex_unlock(&syme->src->lock); 695 } 696 697 fprintf(stdout, "\n%s: ", msg); 698 if (getline(&buf, &dummy, stdin) < 0) 699 goto out_free; 700 701 p = strchr(buf, '\n'); 702 if (p) 703 *p = 0; 704 705 pthread_mutex_lock(&active_symbols_lock); 706 syme = list_entry(active_symbols.next, struct sym_entry, node); 707 pthread_mutex_unlock(&active_symbols_lock); 708 709 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 710 struct symbol *sym = sym_entry__symbol(syme); 711 712 if (!strcmp(buf, sym->name)) { 713 found = syme; 714 break; 715 } 716 } 717 718 if (!found) { 719 fprintf(stderr, "Sorry, %s is not active.\n", buf); 720 sleep(1); 721 return; 722 } else 723 parse_source(found); 724 725out_free: 726 free(buf); 727} 728 729static void print_mapped_keys(void) 730{ 731 char *name = NULL; 732 733 if (sym_filter_entry) { 734 struct symbol *sym = sym_entry__symbol(sym_filter_entry); 735 name = sym->name; 736 } 737 738 fprintf(stdout, "\nMapped keys:\n"); 739 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); 740 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); 741 742 if (nr_counters > 1) 743 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); 744 745 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); 746 747 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); 748 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); 749 fprintf(stdout, "\t[S] stop annotation.\n"); 750 751 if (nr_counters > 1) 752 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); 753 754 fprintf(stdout, 755 "\t[K] hide kernel_symbols symbols. \t(%s)\n", 756 hide_kernel_symbols ? "yes" : "no"); 757 fprintf(stdout, 758 "\t[U] hide user symbols. \t(%s)\n", 759 hide_user_symbols ? "yes" : "no"); 760 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); 761 fprintf(stdout, "\t[qQ] quit.\n"); 762} 763 764static int key_mapped(int c) 765{ 766 switch (c) { 767 case 'd': 768 case 'e': 769 case 'f': 770 case 'z': 771 case 'q': 772 case 'Q': 773 case 'K': 774 case 'U': 775 case 'F': 776 case 's': 777 case 'S': 778 return 1; 779 case 'E': 780 case 'w': 781 return nr_counters > 1 ? 1 : 0; 782 default: 783 break; 784 } 785 786 return 0; 787} 788 789static void handle_keypress(struct perf_session *session, int c) 790{ 791 if (!key_mapped(c)) { 792 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 793 struct termios tc, save; 794 795 print_mapped_keys(); 796 fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); 797 fflush(stdout); 798 799 tcgetattr(0, &save); 800 tc = save; 801 tc.c_lflag &= ~(ICANON | ECHO); 802 tc.c_cc[VMIN] = 0; 803 tc.c_cc[VTIME] = 0; 804 tcsetattr(0, TCSANOW, &tc); 805 806 poll(&stdin_poll, 1, -1); 807 c = getc(stdin); 808 809 tcsetattr(0, TCSAFLUSH, &save); 810 if (!key_mapped(c)) 811 return; 812 } 813 814 switch (c) { 815 case 'd': 816 prompt_integer(&delay_secs, "Enter display delay"); 817 if (delay_secs < 1) 818 delay_secs = 1; 819 break; 820 case 'e': 821 prompt_integer(&print_entries, "Enter display entries (lines)"); 822 if (print_entries == 0) { 823 sig_winch_handler(SIGWINCH); 824 signal(SIGWINCH, sig_winch_handler); 825 } else 826 signal(SIGWINCH, SIG_DFL); 827 break; 828 case 'E': 829 if (nr_counters > 1) { 830 int i; 831 832 fprintf(stderr, "\nAvailable events:"); 833 for (i = 0; i < nr_counters; i++) 834 fprintf(stderr, "\n\t%d %s", i, event_name(i)); 835 836 prompt_integer(&sym_counter, "Enter details event counter"); 837 838 if (sym_counter >= nr_counters) { 839 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0)); 840 sym_counter = 0; 841 sleep(1); 842 } 843 } else sym_counter = 0; 844 break; 845 case 'f': 846 prompt_integer(&count_filter, "Enter display event count filter"); 847 break; 848 case 'F': 849 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); 850 break; 851 case 'K': 852 hide_kernel_symbols = !hide_kernel_symbols; 853 break; 854 case 'q': 855 case 'Q': 856 printf("exiting.\n"); 857 if (dump_symtab) 858 perf_session__fprintf_dsos(session, stderr); 859 exit(0); 860 case 's': 861 prompt_symbol(&sym_filter_entry, "Enter details symbol"); 862 break; 863 case 'S': 864 if (!sym_filter_entry) 865 break; 866 else { 867 struct sym_entry *syme = sym_filter_entry; 868 869 pthread_mutex_lock(&syme->src->lock); 870 sym_filter_entry = NULL; 871 __zero_source_counters(syme); 872 pthread_mutex_unlock(&syme->src->lock); 873 } 874 break; 875 case 'U': 876 hide_user_symbols = !hide_user_symbols; 877 break; 878 case 'w': 879 display_weighted = ~display_weighted; 880 break; 881 case 'z': 882 zero = !zero; 883 break; 884 default: 885 break; 886 } 887} 888 889static void *display_thread(void *arg __used) 890{ 891 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 892 struct termios tc, save; 893 int delay_msecs, c; 894 struct perf_session *session = (struct perf_session *) arg; 895 896 tcgetattr(0, &save); 897 tc = save; 898 tc.c_lflag &= ~(ICANON | ECHO); 899 tc.c_cc[VMIN] = 0; 900 tc.c_cc[VTIME] = 0; 901 902repeat: 903 delay_msecs = delay_secs * 1000; 904 tcsetattr(0, TCSANOW, &tc); 905 /* trash return*/ 906 getc(stdin); 907 908 do { 909 print_sym_table(); 910 } while (!poll(&stdin_poll, 1, delay_msecs) == 1); 911 912 c = getc(stdin); 913 tcsetattr(0, TCSAFLUSH, &save); 914 915 handle_keypress(session, c); 916 goto repeat; 917 918 return NULL; 919} 920 921/* Tag samples to be skipped. */ 922static const char *skip_symbols[] = { 923 "default_idle", 924 "cpu_idle", 925 "enter_idle", 926 "exit_idle", 927 "mwait_idle", 928 "mwait_idle_with_hints", 929 "poll_idle", 930 "ppc64_runlatch_off", 931 "pseries_dedicated_idle_sleep", 932 NULL 933}; 934 935static int symbol_filter(struct map *map, struct symbol *sym) 936{ 937 struct sym_entry *syme; 938 const char *name = sym->name; 939 int i; 940 941 /* 942 * ppc64 uses function descriptors and appends a '.' to the 943 * start of every instruction address. Remove it. 944 */ 945 if (name[0] == '.') 946 name++; 947 948 if (!strcmp(name, "_text") || 949 !strcmp(name, "_etext") || 950 !strcmp(name, "_sinittext") || 951 !strncmp("init_module", name, 11) || 952 !strncmp("cleanup_module", name, 14) || 953 strstr(name, "_text_start") || 954 strstr(name, "_text_end")) 955 return 1; 956 957 syme = symbol__priv(sym); 958 syme->map = map; 959 syme->src = NULL; 960 961 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) { 962 /* schedule initial sym_filter_entry setup */ 963 sym_filter_entry_sched = syme; 964 sym_filter = NULL; 965 } 966 967 for (i = 0; skip_symbols[i]; i++) { 968 if (!strcmp(skip_symbols[i], name)) { 969 syme->skip = 1; 970 break; 971 } 972 } 973 974 if (!syme->skip) 975 syme->name_len = strlen(sym->name); 976 977 return 0; 978} 979 980static void event__process_sample(const event_t *self, 981 struct perf_session *session, int counter) 982{ 983 u64 ip = self->ip.ip; 984 struct sym_entry *syme; 985 struct addr_location al; 986 struct sample_data data; 987 struct machine *machine; 988 u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 989 990 ++samples; 991 992 switch (origin) { 993 case PERF_RECORD_MISC_USER: 994 ++us_samples; 995 if (hide_user_symbols) 996 return; 997 machine = perf_session__find_host_machine(session); 998 break; 999 case PERF_RECORD_MISC_KERNEL: 1000 ++kernel_samples; 1001 if (hide_kernel_symbols) 1002 return; 1003 machine = perf_session__find_host_machine(session); 1004 break; 1005 case PERF_RECORD_MISC_GUEST_KERNEL: 1006 ++guest_kernel_samples; 1007 machine = perf_session__find_machine(session, self->ip.pid); 1008 break; 1009 case PERF_RECORD_MISC_GUEST_USER: 1010 ++guest_us_samples; 1011 /* 1012 * TODO: we don't process guest user from host side 1013 * except simple counting. 1014 */ 1015 return; 1016 default: 1017 return; 1018 } 1019 1020 if (!machine && perf_guest) { 1021 pr_err("Can't find guest [%d]'s kernel information\n", 1022 self->ip.pid); 1023 return; 1024 } 1025 1026 if (self->header.misc & PERF_RECORD_MISC_EXACT_IP) 1027 exact_samples++; 1028 1029 if (event__preprocess_sample(self, session, &al, &data, 1030 symbol_filter) < 0 || 1031 al.filtered) 1032 return; 1033 1034 if (al.sym == NULL) { 1035 /* 1036 * As we do lazy loading of symtabs we only will know if the 1037 * specified vmlinux file is invalid when we actually have a 1038 * hit in kernel space and then try to load it. So if we get 1039 * here and there are _no_ symbols in the DSO backing the 1040 * kernel map, bail out. 1041 * 1042 * We may never get here, for instance, if we use -K/ 1043 * --hide-kernel-symbols, even if the user specifies an 1044 * invalid --vmlinux ;-) 1045 */ 1046 if (al.map == machine->vmlinux_maps[MAP__FUNCTION] && 1047 RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { 1048 pr_err("The %s file can't be used\n", 1049 symbol_conf.vmlinux_name); 1050 exit(1); 1051 } 1052 1053 return; 1054 } 1055 1056 /* let's see, whether we need to install initial sym_filter_entry */ 1057 if (sym_filter_entry_sched) { 1058 sym_filter_entry = sym_filter_entry_sched; 1059 sym_filter_entry_sched = NULL; 1060 if (parse_source(sym_filter_entry) < 0) { 1061 struct symbol *sym = sym_entry__symbol(sym_filter_entry); 1062 1063 pr_err("Can't annotate %s", sym->name); 1064 if (sym_filter_entry->map->dso->origin == DSO__ORIG_KERNEL) { 1065 pr_err(": No vmlinux file was found in the path:\n"); 1066 machine__fprintf_vmlinux_path(machine, stderr); 1067 } else 1068 pr_err(".\n"); 1069 exit(1); 1070 } 1071 } 1072 1073 syme = symbol__priv(al.sym); 1074 if (!syme->skip) { 1075 syme->count[counter]++; 1076 syme->origin = origin; 1077 record_precise_ip(syme, counter, ip); 1078 pthread_mutex_lock(&active_symbols_lock); 1079 if (list_empty(&syme->node) || !syme->node.next) 1080 __list_insert_active_sym(syme); 1081 pthread_mutex_unlock(&active_symbols_lock); 1082 } 1083} 1084 1085struct mmap_data { 1086 int counter; 1087 void *base; 1088 int mask; 1089 unsigned int prev; 1090}; 1091 1092static unsigned int mmap_read_head(struct mmap_data *md) 1093{ 1094 struct perf_event_mmap_page *pc = md->base; 1095 int head; 1096 1097 head = pc->data_head; 1098 rmb(); 1099 1100 return head; 1101} 1102 1103static void perf_session__mmap_read_counter(struct perf_session *self, 1104 struct mmap_data *md) 1105{ 1106 unsigned int head = mmap_read_head(md); 1107 unsigned int old = md->prev; 1108 unsigned char *data = md->base + page_size; 1109 int diff; 1110 1111 /* 1112 * If we're further behind than half the buffer, there's a chance 1113 * the writer will bite our tail and mess up the samples under us. 1114 * 1115 * If we somehow ended up ahead of the head, we got messed up. 1116 * 1117 * In either case, truncate and restart at head. 1118 */ 1119 diff = head - old; 1120 if (diff > md->mask / 2 || diff < 0) { 1121 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); 1122 1123 /* 1124 * head points to a known good entry, start there. 1125 */ 1126 old = head; 1127 } 1128 1129 for (; old != head;) { 1130 event_t *event = (event_t *)&data[old & md->mask]; 1131 1132 event_t event_copy; 1133 1134 size_t size = event->header.size; 1135 1136 /* 1137 * Event straddles the mmap boundary -- header should always 1138 * be inside due to u64 alignment of output. 1139 */ 1140 if ((old & md->mask) + size != ((old + size) & md->mask)) { 1141 unsigned int offset = old; 1142 unsigned int len = min(sizeof(*event), size), cpy; 1143 void *dst = &event_copy; 1144 1145 do { 1146 cpy = min(md->mask + 1 - (offset & md->mask), len); 1147 memcpy(dst, &data[offset & md->mask], cpy); 1148 offset += cpy; 1149 dst += cpy; 1150 len -= cpy; 1151 } while (len); 1152 1153 event = &event_copy; 1154 } 1155 1156 if (event->header.type == PERF_RECORD_SAMPLE) 1157 event__process_sample(event, self, md->counter); 1158 else 1159 event__process(event, self); 1160 old += size; 1161 } 1162 1163 md->prev = old; 1164} 1165 1166static struct pollfd *event_array; 1167static struct mmap_data *mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; 1168 1169static void perf_session__mmap_read(struct perf_session *self) 1170{ 1171 int i, counter, thread_index; 1172 1173 for (i = 0; i < nr_cpus; i++) { 1174 for (counter = 0; counter < nr_counters; counter++) 1175 for (thread_index = 0; 1176 thread_index < thread_num; 1177 thread_index++) { 1178 perf_session__mmap_read_counter(self, 1179 &mmap_array[i][counter][thread_index]); 1180 } 1181 } 1182} 1183 1184int nr_poll; 1185int group_fd; 1186 1187static void start_counter(int i, int counter) 1188{ 1189 struct perf_event_attr *attr; 1190 int cpu; 1191 int thread_index; 1192 1193 cpu = profile_cpu; 1194 if (target_tid == -1 && profile_cpu == -1) 1195 cpu = cpumap[i]; 1196 1197 attr = attrs + counter; 1198 1199 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 1200 1201 if (freq) { 1202 attr->sample_type |= PERF_SAMPLE_PERIOD; 1203 attr->freq = 1; 1204 attr->sample_freq = freq; 1205 } 1206 1207 attr->inherit = (cpu < 0) && inherit; 1208 attr->mmap = 1; 1209 1210 for (thread_index = 0; thread_index < thread_num; thread_index++) { 1211try_again: 1212 fd[i][counter][thread_index] = sys_perf_event_open(attr, 1213 all_tids[thread_index], cpu, group_fd, 0); 1214 1215 if (fd[i][counter][thread_index] < 0) { 1216 int err = errno; 1217 1218 if (err == EPERM || err == EACCES) 1219 die("No permission - are you root?\n"); 1220 /* 1221 * If it's cycles then fall back to hrtimer 1222 * based cpu-clock-tick sw counter, which 1223 * is always available even if no PMU support: 1224 */ 1225 if (attr->type == PERF_TYPE_HARDWARE 1226 && attr->config == PERF_COUNT_HW_CPU_CYCLES) { 1227 1228 if (verbose) 1229 warning(" ... trying to fall back to cpu-clock-ticks\n"); 1230 1231 attr->type = PERF_TYPE_SOFTWARE; 1232 attr->config = PERF_COUNT_SW_CPU_CLOCK; 1233 goto try_again; 1234 } 1235 printf("\n"); 1236 error("perfcounter syscall returned with %d (%s)\n", 1237 fd[i][counter][thread_index], strerror(err)); 1238 die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); 1239 exit(-1); 1240 } 1241 assert(fd[i][counter][thread_index] >= 0); 1242 fcntl(fd[i][counter][thread_index], F_SETFL, O_NONBLOCK); 1243 1244 /* 1245 * First counter acts as the group leader: 1246 */ 1247 if (group && group_fd == -1) 1248 group_fd = fd[i][counter][thread_index]; 1249 1250 event_array[nr_poll].fd = fd[i][counter][thread_index]; 1251 event_array[nr_poll].events = POLLIN; 1252 nr_poll++; 1253 1254 mmap_array[i][counter][thread_index].counter = counter; 1255 mmap_array[i][counter][thread_index].prev = 0; 1256 mmap_array[i][counter][thread_index].mask = mmap_pages*page_size - 1; 1257 mmap_array[i][counter][thread_index].base = mmap(NULL, (mmap_pages+1)*page_size, 1258 PROT_READ, MAP_SHARED, fd[i][counter][thread_index], 0); 1259 if (mmap_array[i][counter][thread_index].base == MAP_FAILED) 1260 die("failed to mmap with %d (%s)\n", errno, strerror(errno)); 1261 } 1262} 1263 1264static int __cmd_top(void) 1265{ 1266 pthread_t thread; 1267 int i, counter; 1268 int ret; 1269 struct perf_session *session = perf_session__new(NULL, O_WRONLY, false, false); 1270 if (session == NULL) 1271 return -ENOMEM; 1272 1273 if (target_tid != -1) 1274 event__synthesize_thread(target_tid, event__process, session); 1275 else 1276 event__synthesize_threads(event__process, session); 1277 1278 for (i = 0; i < nr_cpus; i++) { 1279 group_fd = -1; 1280 for (counter = 0; counter < nr_counters; counter++) 1281 start_counter(i, counter); 1282 } 1283 1284 /* Wait for a minimal set of events before starting the snapshot */ 1285 poll(&event_array[0], nr_poll, 100); 1286 1287 perf_session__mmap_read(session); 1288 1289 if (pthread_create(&thread, NULL, display_thread, session)) { 1290 printf("Could not create display thread.\n"); 1291 exit(-1); 1292 } 1293 1294 if (realtime_prio) { 1295 struct sched_param param; 1296 1297 param.sched_priority = realtime_prio; 1298 if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { 1299 printf("Could not set realtime priority.\n"); 1300 exit(-1); 1301 } 1302 } 1303 1304 while (1) { 1305 int hits = samples; 1306 1307 perf_session__mmap_read(session); 1308 1309 if (hits == samples) 1310 ret = poll(event_array, nr_poll, 100); 1311 } 1312 1313 return 0; 1314} 1315 1316static const char * const top_usage[] = { 1317 "perf top [<options>]", 1318 NULL 1319}; 1320 1321static const struct option options[] = { 1322 OPT_CALLBACK('e', "event", NULL, "event", 1323 "event selector. use 'perf list' to list available events", 1324 parse_events), 1325 OPT_INTEGER('c', "count", &default_interval, 1326 "event period to sample"), 1327 OPT_INTEGER('p', "pid", &target_pid, 1328 "profile events on existing process id"), 1329 OPT_INTEGER('t', "tid", &target_tid, 1330 "profile events on existing thread id"), 1331 OPT_BOOLEAN('a', "all-cpus", &system_wide, 1332 "system-wide collection from all CPUs"), 1333 OPT_STRING('C', "cpu", &cpu_list, "cpu", 1334 "list of cpus to monitor"), 1335 OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, 1336 "file", "vmlinux pathname"), 1337 OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols, 1338 "hide kernel symbols"), 1339 OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), 1340 OPT_INTEGER('r', "realtime", &realtime_prio, 1341 "collect data with this RT SCHED_FIFO priority"), 1342 OPT_INTEGER('d', "delay", &delay_secs, 1343 "number of seconds to delay between refreshes"), 1344 OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, 1345 "dump the symbol table used for profiling"), 1346 OPT_INTEGER('f', "count-filter", &count_filter, 1347 "only display functions with more events than this"), 1348 OPT_BOOLEAN('g', "group", &group, 1349 "put the counters into a counter group"), 1350 OPT_BOOLEAN('i', "inherit", &inherit, 1351 "child tasks inherit counters"), 1352 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", 1353 "symbol to annotate"), 1354 OPT_BOOLEAN('z', "zero", &zero, 1355 "zero history across updates"), 1356 OPT_INTEGER('F', "freq", &freq, 1357 "profile at this frequency"), 1358 OPT_INTEGER('E', "entries", &print_entries, 1359 "display this many functions"), 1360 OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols, 1361 "hide user symbols"), 1362 OPT_INCR('v', "verbose", &verbose, 1363 "be more verbose (show counter open errors, etc)"), 1364 OPT_END() 1365}; 1366 1367int cmd_top(int argc, const char **argv, const char *prefix __used) 1368{ 1369 int counter; 1370 int i,j; 1371 1372 page_size = sysconf(_SC_PAGE_SIZE); 1373 1374 argc = parse_options(argc, argv, options, top_usage, 0); 1375 if (argc) 1376 usage_with_options(top_usage, options); 1377 1378 if (target_pid != -1) { 1379 target_tid = target_pid; 1380 thread_num = find_all_tid(target_pid, &all_tids); 1381 if (thread_num <= 0) { 1382 fprintf(stderr, "Can't find all threads of pid %d\n", 1383 target_pid); 1384 usage_with_options(top_usage, options); 1385 } 1386 } else { 1387 all_tids=malloc(sizeof(pid_t)); 1388 if (!all_tids) 1389 return -ENOMEM; 1390 1391 all_tids[0] = target_tid; 1392 thread_num = 1; 1393 } 1394 1395 for (i = 0; i < MAX_NR_CPUS; i++) { 1396 for (j = 0; j < MAX_COUNTERS; j++) { 1397 fd[i][j] = malloc(sizeof(int)*thread_num); 1398 mmap_array[i][j] = zalloc( 1399 sizeof(struct mmap_data)*thread_num); 1400 if (!fd[i][j] || !mmap_array[i][j]) 1401 return -ENOMEM; 1402 } 1403 } 1404 event_array = malloc( 1405 sizeof(struct pollfd)*MAX_NR_CPUS*MAX_COUNTERS*thread_num); 1406 if (!event_array) 1407 return -ENOMEM; 1408 1409 /* CPU and PID are mutually exclusive */ 1410 if (target_tid > 0 && cpu_list) { 1411 printf("WARNING: PID switch overriding CPU\n"); 1412 sleep(1); 1413 cpu_list = NULL; 1414 } 1415 1416 if (!nr_counters) 1417 nr_counters = 1; 1418 1419 symbol_conf.priv_size = (sizeof(struct sym_entry) + 1420 (nr_counters + 1) * sizeof(unsigned long)); 1421 1422 symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); 1423 if (symbol__init() < 0) 1424 return -1; 1425 1426 if (delay_secs < 1) 1427 delay_secs = 1; 1428 1429 /* 1430 * User specified count overrides default frequency. 1431 */ 1432 if (default_interval) 1433 freq = 0; 1434 else if (freq) { 1435 default_interval = freq; 1436 } else { 1437 fprintf(stderr, "frequency and count are zero, aborting\n"); 1438 exit(EXIT_FAILURE); 1439 } 1440 1441 /* 1442 * Fill in the ones not specifically initialized via -c: 1443 */ 1444 for (counter = 0; counter < nr_counters; counter++) { 1445 if (attrs[counter].sample_period) 1446 continue; 1447 1448 attrs[counter].sample_period = default_interval; 1449 } 1450 1451 if (target_tid != -1) 1452 nr_cpus = 1; 1453 else 1454 nr_cpus = read_cpu_map(cpu_list); 1455 1456 if (nr_cpus < 1) 1457 usage_with_options(top_usage, options); 1458 1459 get_term_dimensions(&winsize); 1460 if (print_entries == 0) { 1461 update_print_entries(&winsize); 1462 signal(SIGWINCH, sig_winch_handler); 1463 } 1464 1465 return __cmd_top(); 1466} 1467