1/*- 2 * Copyright (c) 2014, 2015 Netflix Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer, 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28#include <sys/types.h> 29#include <stdio.h> 30#include <stdlib.h> 31#include <unistd.h> 32#include <string.h> 33#include <strings.h> 34#include <sys/errno.h> 35#include <signal.h> 36#include <sys/wait.h> 37#include <getopt.h> 38#include "eval_expr.h" 39__FBSDID("$FreeBSD: releng/10.3/usr.sbin/pmcstudy/pmcstudy.c 285849 2015-07-24 19:09:11Z emaste $"); 40 41#define MAX_COUNTER_SLOTS 1024 42#define MAX_NLEN 64 43#define MAX_CPU 64 44static int verbose = 0; 45 46extern char **environ; 47extern struct expression *master_exp; 48struct expression *master_exp=NULL; 49 50#define PMC_INITIAL_ALLOC 512 51extern char **valid_pmcs; 52char **valid_pmcs = NULL; 53extern int valid_pmc_cnt; 54int valid_pmc_cnt=0; 55extern int pmc_allocated_cnt; 56int pmc_allocated_cnt=0; 57 58/* 59 * The following two varients on popen and pclose with 60 * the cavet that they get you the PID so that you 61 * can supply it to pclose so it can send a SIGTERM 62 * to the process. 63 */ 64static FILE * 65my_popen(const char *command, const char *dir, pid_t *p_pid) 66{ 67 FILE *io_out, *io_in; 68 int pdesin[2], pdesout[2]; 69 char *argv[4]; 70 pid_t pid; 71 char cmd[4]; 72 char cmd2[1024]; 73 char arg1[4]; 74 75 if ((strcmp(dir, "r") != 0) && 76 (strcmp(dir, "w") != 0)) { 77 errno = EINVAL; 78 return(NULL); 79 } 80 if (pipe(pdesin) < 0) 81 return (NULL); 82 83 if (pipe(pdesout) < 0) { 84 (void)close(pdesin[0]); 85 (void)close(pdesin[1]); 86 return (NULL); 87 } 88 strcpy(cmd, "sh"); 89 strcpy(arg1, "-c"); 90 strcpy(cmd2, command); 91 argv[0] = cmd; 92 argv[1] = arg1; 93 argv[2] = cmd2; 94 argv[3] = NULL; 95 96 switch (pid = fork()) { 97 case -1: /* Error. */ 98 (void)close(pdesin[0]); 99 (void)close(pdesin[1]); 100 (void)close(pdesout[0]); 101 (void)close(pdesout[1]); 102 return (NULL); 103 /* NOTREACHED */ 104 case 0: /* Child. */ 105 /* Close out un-used sides */ 106 (void)close(pdesin[1]); 107 (void)close(pdesout[0]); 108 /* Now prepare the stdin of the process */ 109 close(0); 110 (void)dup(pdesin[0]); 111 (void)close(pdesin[0]); 112 /* Now prepare the stdout of the process */ 113 close(1); 114 (void)dup(pdesout[1]); 115 /* And lets do stderr just in case */ 116 close(2); 117 (void)dup(pdesout[1]); 118 (void)close(pdesout[1]); 119 /* Now run it */ 120 execve("/bin/sh", argv, environ); 121 exit(127); 122 /* NOTREACHED */ 123 } 124 /* Parent; assume fdopen can't fail. */ 125 /* Store the pid */ 126 *p_pid = pid; 127 if (strcmp(dir, "r") != 0) { 128 io_out = fdopen(pdesin[1], "w"); 129 (void)close(pdesin[0]); 130 (void)close(pdesout[0]); 131 (void)close(pdesout[1]); 132 return(io_out); 133 } else { 134 /* Prepare the input stream */ 135 io_in = fdopen(pdesout[0], "r"); 136 (void)close(pdesout[1]); 137 (void)close(pdesin[0]); 138 (void)close(pdesin[1]); 139 return (io_in); 140 } 141} 142 143/* 144 * pclose -- 145 * Pclose returns -1 if stream is not associated with a `popened' command, 146 * if already `pclosed', or waitpid returns an error. 147 */ 148static void 149my_pclose(FILE *io, pid_t the_pid) 150{ 151 int pstat; 152 pid_t pid; 153 154 /* 155 * Find the appropriate file pointer and remove it from the list. 156 */ 157 (void)fclose(io); 158 /* Die if you are not dead! */ 159 kill(the_pid, SIGTERM); 160 do { 161 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0); 162 } while (pid == -1 && errno == EINTR); 163} 164 165struct counters { 166 struct counters *next_cpu; 167 char counter_name[MAX_NLEN]; /* Name of counter */ 168 int cpu; /* CPU we are on */ 169 int pos; /* Index we are filling to. */ 170 uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */ 171 uint64_t sum; /* Summary of entries */ 172}; 173 174extern struct counters *glob_cpu[MAX_CPU]; 175struct counters *glob_cpu[MAX_CPU]; 176 177extern struct counters *cnts; 178struct counters *cnts=NULL; 179 180extern int ncnts; 181int ncnts=0; 182 183extern int (*expression)(struct counters *, int); 184int (*expression)(struct counters *, int); 185 186static const char *threshold=NULL; 187static const char *command; 188 189struct cpu_entry { 190 const char *name; 191 const char *thresh; 192 const char *command; 193 int (*func)(struct counters *, int); 194}; 195 196 197struct cpu_type { 198 char cputype[32]; 199 int number; 200 struct cpu_entry *ents; 201 void (*explain)(const char *name); 202}; 203extern struct cpu_type the_cpu; 204struct cpu_type the_cpu; 205 206static void 207explain_name_sb(const char *name) 208{ 209 const char *mythresh; 210 if (strcmp(name, "allocstall1") == 0) { 211 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n"); 212 mythresh = "thresh > .05"; 213 } else if (strcmp(name, "allocstall2") == 0) { 214 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n"); 215 mythresh = "thresh > .05"; 216 } else if (strcmp(name, "br_miss") == 0) { 217 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n"); 218 mythresh = "thresh >= .2"; 219 } else if (strcmp(name, "splitload") == 0) { 220 printf("Examine MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 221 mythresh = "thresh >= .1"; 222 } else if (strcmp(name, "splitstore") == 0) { 223 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); 224 mythresh = "thresh >= .01"; 225 } else if (strcmp(name, "contested") == 0) { 226 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 227 mythresh = "thresh >= .05"; 228 } else if (strcmp(name, "blockstorefwd") == 0) { 229 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 230 mythresh = "thresh >= .05"; 231 } else if (strcmp(name, "cache2") == 0) { 232 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n"); 233 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n"); 234 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n"); 235 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n"); 236 mythresh = "thresh >= .2"; 237 } else if (strcmp(name, "cache1") == 0) { 238 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 239 mythresh = "thresh >= .2"; 240 } else if (strcmp(name, "dtlbmissload") == 0) { 241 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 242 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 243 mythresh = "thresh >= .1"; 244 } else if (strcmp(name, "frontendstall") == 0) { 245 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 246 mythresh = "thresh >= .15"; 247 } else if (strcmp(name, "clears") == 0) { 248 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 249 printf(" MACHINE_CLEARS.SMC + \n"); 250 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 251 mythresh = "thresh >= .02"; 252 } else if (strcmp(name, "microassist") == 0) { 253 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 254 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 255 mythresh = "thresh >= .05"; 256 } else if (strcmp(name, "aliasing_4k") == 0) { 257 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 258 mythresh = "thresh >= .1"; 259 } else if (strcmp(name, "fpassist") == 0) { 260 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 261 mythresh = "look for a excessive value"; 262 } else if (strcmp(name, "otherassistavx") == 0) { 263 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 264 mythresh = "look for a excessive value"; 265 } else if (strcmp(name, "otherassistsse") == 0) { 266 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 267 mythresh = "look for a excessive value"; 268 } else if (strcmp(name, "eff1") == 0) { 269 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 270 mythresh = "thresh < .9"; 271 } else if (strcmp(name, "eff2") == 0) { 272 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 273 mythresh = "thresh > 1.0"; 274 } else if (strcmp(name, "dtlbmissstore") == 0) { 275 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 276 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 277 mythresh = "thresh >= .05"; 278 } else { 279 printf("Unknown name:%s\n", name); 280 mythresh = "unknown entry"; 281 } 282 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 283} 284 285static void 286explain_name_ib(const char *name) 287{ 288 const char *mythresh; 289 if (strcmp(name, "br_miss") == 0) { 290 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n"); 291 printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n"); 292 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n"); 293 mythresh = "thresh >= .2"; 294 } else if (strcmp(name, "eff1") == 0) { 295 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 296 mythresh = "thresh < .9"; 297 } else if (strcmp(name, "eff2") == 0) { 298 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 299 mythresh = "thresh > 1.0"; 300 } else if (strcmp(name, "cache1") == 0) { 301 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 302 mythresh = "thresh >= .2"; 303 } else if (strcmp(name, "cache2") == 0) { 304 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n"); 305 mythresh = "thresh >= .2"; 306 } else if (strcmp(name, "itlbmiss") == 0) { 307 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 308 mythresh = "thresh > .05"; 309 } else if (strcmp(name, "icachemiss") == 0) { 310 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); 311 mythresh = "thresh > .05"; 312 } else if (strcmp(name, "lcpstall") == 0) { 313 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 314 mythresh = "thresh > .05"; 315 } else if (strcmp(name, "datashare") == 0) { 316 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n"); 317 mythresh = "thresh > .05"; 318 } else if (strcmp(name, "blockstorefwd") == 0) { 319 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 320 mythresh = "thresh >= .05"; 321 } else if (strcmp(name, "splitload") == 0) { 322 printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n"); 323 printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n"); 324 mythresh = "thresh >= .1"; 325 } else if (strcmp(name, "splitstore") == 0) { 326 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); 327 mythresh = "thresh >= .01"; 328 } else if (strcmp(name, "aliasing_4k") == 0) { 329 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 330 mythresh = "thresh >= .1"; 331 } else if (strcmp(name, "dtlbmissload") == 0) { 332 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 333 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 334 mythresh = "thresh >= .1"; 335 } else if (strcmp(name, "dtlbmissstore") == 0) { 336 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 337 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 338 mythresh = "thresh >= .05"; 339 } else if (strcmp(name, "contested") == 0) { 340 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 341 mythresh = "thresh >= .05"; 342 } else if (strcmp(name, "clears") == 0) { 343 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 344 printf(" MACHINE_CLEARS.SMC + \n"); 345 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 346 mythresh = "thresh >= .02"; 347 } else if (strcmp(name, "microassist") == 0) { 348 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 349 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 350 mythresh = "thresh >= .05"; 351 } else if (strcmp(name, "fpassist") == 0) { 352 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 353 mythresh = "look for a excessive value"; 354 } else if (strcmp(name, "otherassistavx") == 0) { 355 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 356 mythresh = "look for a excessive value"; 357 } else if (strcmp(name, "otherassistsse") == 0) { 358 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 359 mythresh = "look for a excessive value"; 360 } else { 361 printf("Unknown name:%s\n", name); 362 mythresh = "unknown entry"; 363 } 364 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 365} 366 367 368static void 369explain_name_has(const char *name) 370{ 371 const char *mythresh; 372 if (strcmp(name, "eff1") == 0) { 373 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 374 mythresh = "thresh < .75"; 375 } else if (strcmp(name, "eff2") == 0) { 376 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 377 mythresh = "thresh > 1.0"; 378 } else if (strcmp(name, "itlbmiss") == 0) { 379 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 380 mythresh = "thresh > .05"; 381 } else if (strcmp(name, "icachemiss") == 0) { 382 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n"); 383 mythresh = "thresh > .05"; 384 } else if (strcmp(name, "lcpstall") == 0) { 385 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 386 mythresh = "thresh > .05"; 387 } else if (strcmp(name, "cache1") == 0) { 388 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 389 mythresh = "thresh >= .2"; 390 } else if (strcmp(name, "cache2") == 0) { 391 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n"); 392 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n"); 393 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n"); 394 printf(" / CPU_CLK_UNHALTED.THREAD_P\n"); 395 mythresh = "thresh >= .2"; 396 } else if (strcmp(name, "contested") == 0) { 397 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n"); 398 mythresh = "thresh >= .05"; 399 } else if (strcmp(name, "datashare") == 0) { 400 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); 401 mythresh = "thresh > .05"; 402 } else if (strcmp(name, "blockstorefwd") == 0) { 403 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 404 mythresh = "thresh >= .05"; 405 } else if (strcmp(name, "splitload") == 0) { 406 printf("Examine (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 407 mythresh = "thresh >= .1"; 408 } else if (strcmp(name, "splitstore") == 0) { 409 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); 410 mythresh = "thresh >= .01"; 411 } else if (strcmp(name, "aliasing_4k") == 0) { 412 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 413 mythresh = "thresh >= .1"; 414 } else if (strcmp(name, "dtlbmissload") == 0) { 415 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 416 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 417 mythresh = "thresh >= .1"; 418 } else if (strcmp(name, "br_miss") == 0) { 419 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n"); 420 mythresh = "thresh >= .2"; 421 } else if (strcmp(name, "clears") == 0) { 422 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 423 printf(" MACHINE_CLEARS.SMC + \n"); 424 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 425 mythresh = "thresh >= .02"; 426 } else if (strcmp(name, "microassist") == 0) { 427 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 428 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 429 mythresh = "thresh >= .05"; 430 } else if (strcmp(name, "fpassist") == 0) { 431 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 432 mythresh = "look for a excessive value"; 433 } else if (strcmp(name, "otherassistavx") == 0) { 434 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 435 mythresh = "look for a excessive value"; 436 } else if (strcmp(name, "otherassistsse") == 0) { 437 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 438 mythresh = "look for a excessive value"; 439 } else { 440 printf("Unknown name:%s\n", name); 441 mythresh = "unknown entry"; 442 } 443 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 444} 445 446 447static struct counters * 448find_counter(struct counters *base, const char *name) 449{ 450 struct counters *at; 451 int len; 452 453 at = base; 454 len = strlen(name); 455 while(at) { 456 if (strncmp(at->counter_name, name, len) == 0) { 457 return(at); 458 } 459 at = at->next_cpu; 460 } 461 printf("Can't find counter %s\n", name); 462 printf("We have:\n"); 463 at = base; 464 while(at) { 465 printf("- %s\n", at->counter_name); 466 at = at->next_cpu; 467 } 468 exit(-1); 469} 470 471static int 472allocstall1(struct counters *cpu, int pos) 473{ 474/* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/ 475 int ret; 476 struct counters *partial; 477 struct counters *unhalt; 478 double un, par, res; 479 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 480 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW"); 481 if (pos != -1) { 482 par = partial->vals[pos] * 1.0; 483 un = unhalt->vals[pos] * 1.0; 484 } else { 485 par = partial->sum * 1.0; 486 un = unhalt->sum * 1.0; 487 } 488 res = par/un; 489 ret = printf("%1.3f", res); 490 return(ret); 491} 492 493static int 494allocstall2(struct counters *cpu, int pos) 495{ 496/* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 497 int ret; 498 struct counters *partial; 499 struct counters *unhalt; 500 double un, par, res; 501 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 502 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP"); 503 if (pos != -1) { 504 par = partial->vals[pos] * 1.0; 505 un = unhalt->vals[pos] * 1.0; 506 } else { 507 par = partial->sum * 1.0; 508 un = unhalt->sum * 1.0; 509 } 510 res = par/un; 511 ret = printf("%1.3f", res); 512 return(ret); 513} 514 515static int 516br_mispredict(struct counters *cpu, int pos) 517{ 518 struct counters *brctr; 519 struct counters *unhalt; 520 int ret; 521/* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 522 double br, un, con, res; 523 con = 20.0; 524 525 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 526 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 527 if (pos != -1) { 528 br = brctr->vals[pos] * 1.0; 529 un = unhalt->vals[pos] * 1.0; 530 } else { 531 br = brctr->sum * 1.0; 532 un = unhalt->sum * 1.0; 533 } 534 res = (con * br)/un; 535 ret = printf("%1.3f", res); 536 return(ret); 537} 538 539static int 540br_mispredictib(struct counters *cpu, int pos) 541{ 542 struct counters *brctr; 543 struct counters *unhalt; 544 struct counters *clear, *clear2, *clear3; 545 struct counters *uops; 546 struct counters *recv; 547 struct counters *iss; 548/* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/ 549 int ret; 550 /* 551 * (BR_MISP_RETIRED.ALL_BRANCHES / 552 * (BR_MISP_RETIRED.ALL_BRANCHES + 553 * MACHINE_CLEAR.COUNT) * 554 * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD))) 555 * 556 */ 557 double br, cl, cl2, cl3, uo, re, un, con, res, is; 558 con = 4.0; 559 560 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 561 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 562 clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 563 clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 564 clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 565 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 566 iss = find_counter(cpu, "UOPS_ISSUED.ANY"); 567 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); 568 if (pos != -1) { 569 br = brctr->vals[pos] * 1.0; 570 cl = clear->vals[pos] * 1.0; 571 cl2 = clear2->vals[pos] * 1.0; 572 cl3 = clear3->vals[pos] * 1.0; 573 uo = uops->vals[pos] * 1.0; 574 re = recv->vals[pos] * 1.0; 575 is = iss->vals[pos] * 1.0; 576 un = unhalt->vals[pos] * 1.0; 577 } else { 578 br = brctr->sum * 1.0; 579 cl = clear->sum * 1.0; 580 cl2 = clear2->sum * 1.0; 581 cl3 = clear3->sum * 1.0; 582 uo = uops->sum * 1.0; 583 re = recv->sum * 1.0; 584 is = iss->sum * 1.0; 585 un = unhalt->sum * 1.0; 586 } 587 res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un))); 588 ret = printf("%1.3f", res); 589 return(ret); 590} 591 592static int 593splitloadib(struct counters *cpu, int pos) 594{ 595 int ret; 596 struct counters *mem; 597 struct counters *l1d, *ldblock; 598 struct counters *unhalt; 599 double un, memd, res, l1, ldb; 600 /* 601 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P 602 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 603 */ 604 605 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 606 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS"); 607 l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING"); 608 ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR"); 609 if (pos != -1) { 610 memd = mem->vals[pos] * 1.0; 611 l1 = l1d->vals[pos] * 1.0; 612 ldb = ldblock->vals[pos] * 1.0; 613 un = unhalt->vals[pos] * 1.0; 614 } else { 615 memd = mem->sum * 1.0; 616 l1 = l1d->sum * 1.0; 617 ldb = ldblock->sum * 1.0; 618 un = unhalt->sum * 1.0; 619 } 620 res = ((l1 / memd) * ldb)/un; 621 ret = printf("%1.3f", res); 622 return(ret); 623} 624 625static int 626splitload(struct counters *cpu, int pos) 627{ 628 int ret; 629 struct counters *mem; 630 struct counters *unhalt; 631 double con, un, memd, res; 632/* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ 633 634 con = 5.0; 635 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 636 mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS"); 637 if (pos != -1) { 638 memd = mem->vals[pos] * 1.0; 639 un = unhalt->vals[pos] * 1.0; 640 } else { 641 memd = mem->sum * 1.0; 642 un = unhalt->sum * 1.0; 643 } 644 res = (memd * con)/un; 645 ret = printf("%1.3f", res); 646 return(ret); 647} 648 649static int 650splitstore(struct counters *cpu, int pos) 651{ 652 /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */ 653 int ret; 654 struct counters *mem_split; 655 struct counters *mem_stores; 656 double memsplit, memstore, res; 657 mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES"); 658 mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES"); 659 if (pos != -1) { 660 memsplit = mem_split->vals[pos] * 1.0; 661 memstore = mem_stores->vals[pos] * 1.0; 662 } else { 663 memsplit = mem_split->sum * 1.0; 664 memstore = mem_stores->sum * 1.0; 665 } 666 res = memsplit/memstore; 667 ret = printf("%1.3f", res); 668 return(ret); 669} 670 671 672static int 673contested(struct counters *cpu, int pos) 674{ 675 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 676 int ret; 677 struct counters *mem; 678 struct counters *unhalt; 679 double con, un, memd, res; 680 681 con = 60.0; 682 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 683 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 684 if (pos != -1) { 685 memd = mem->vals[pos] * 1.0; 686 un = unhalt->vals[pos] * 1.0; 687 } else { 688 memd = mem->sum * 1.0; 689 un = unhalt->sum * 1.0; 690 } 691 res = (memd * con)/un; 692 ret = printf("%1.3f", res); 693 return(ret); 694} 695 696static int 697contested_has(struct counters *cpu, int pos) 698{ 699 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 700 int ret; 701 struct counters *mem; 702 struct counters *unhalt; 703 double con, un, memd, res; 704 705 con = 84.0; 706 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 707 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 708 if (pos != -1) { 709 memd = mem->vals[pos] * 1.0; 710 un = unhalt->vals[pos] * 1.0; 711 } else { 712 memd = mem->sum * 1.0; 713 un = unhalt->sum * 1.0; 714 } 715 res = (memd * con)/un; 716 ret = printf("%1.3f", res); 717 return(ret); 718} 719 720 721static int 722blockstoreforward(struct counters *cpu, int pos) 723{ 724 /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/ 725 int ret; 726 struct counters *ldb; 727 struct counters *unhalt; 728 double con, un, ld, res; 729 730 con = 13.0; 731 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 732 ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD"); 733 if (pos != -1) { 734 ld = ldb->vals[pos] * 1.0; 735 un = unhalt->vals[pos] * 1.0; 736 } else { 737 ld = ldb->sum * 1.0; 738 un = unhalt->sum * 1.0; 739 } 740 res = (ld * con)/un; 741 ret = printf("%1.3f", res); 742 return(ret); 743} 744 745static int 746cache2(struct counters *cpu, int pos) 747{ 748 /* ** Suspect *** 749 * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + 750 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 751 */ 752 int ret; 753 struct counters *mem1, *mem2, *mem3; 754 struct counters *unhalt; 755 double con1, con2, con3, un, me_1, me_2, me_3, res; 756 757 con1 = 26.0; 758 con2 = 43.0; 759 con3 = 60.0; 760 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 761/* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/ 762 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 763 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 764 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 765 if (pos != -1) { 766 me_1 = mem1->vals[pos] * 1.0; 767 me_2 = mem2->vals[pos] * 1.0; 768 me_3 = mem3->vals[pos] * 1.0; 769 un = unhalt->vals[pos] * 1.0; 770 } else { 771 me_1 = mem1->sum * 1.0; 772 me_2 = mem2->sum * 1.0; 773 me_3 = mem3->sum * 1.0; 774 un = unhalt->sum * 1.0; 775 } 776 res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un; 777 ret = printf("%1.3f", res); 778 return(ret); 779} 780 781static int 782datasharing(struct counters *cpu, int pos) 783{ 784 /* 785 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 786 */ 787 int ret; 788 struct counters *mem; 789 struct counters *unhalt; 790 double con, res, me, un; 791 792 con = 43.0; 793 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 794 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 795 if (pos != -1) { 796 me = mem->vals[pos] * 1.0; 797 un = unhalt->vals[pos] * 1.0; 798 } else { 799 me = mem->sum * 1.0; 800 un = unhalt->sum * 1.0; 801 } 802 res = (me * con)/un; 803 ret = printf("%1.3f", res); 804 return(ret); 805 806} 807 808 809static int 810datasharing_has(struct counters *cpu, int pos) 811{ 812 /* 813 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 814 */ 815 int ret; 816 struct counters *mem; 817 struct counters *unhalt; 818 double con, res, me, un; 819 820 con = 72.0; 821 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 822 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 823 if (pos != -1) { 824 me = mem->vals[pos] * 1.0; 825 un = unhalt->vals[pos] * 1.0; 826 } else { 827 me = mem->sum * 1.0; 828 un = unhalt->sum * 1.0; 829 } 830 res = (me * con)/un; 831 ret = printf("%1.3f", res); 832 return(ret); 833 834} 835 836 837static int 838cache2ib(struct counters *cpu, int pos) 839{ 840 /* 841 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 842 */ 843 int ret; 844 struct counters *mem; 845 struct counters *unhalt; 846 double con, un, me, res; 847 848 con = 29.0; 849 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 850 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 851 if (pos != -1) { 852 me = mem->vals[pos] * 1.0; 853 un = unhalt->vals[pos] * 1.0; 854 } else { 855 me = mem->sum * 1.0; 856 un = unhalt->sum * 1.0; 857 } 858 res = (con * me)/un; 859 ret = printf("%1.3f", res); 860 return(ret); 861} 862 863static int 864cache2has(struct counters *cpu, int pos) 865{ 866 /* 867 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \ 868 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + 869 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) 870 * / CPU_CLK_UNHALTED.THREAD_P 871 */ 872 int ret; 873 struct counters *mem1, *mem2, *mem3; 874 struct counters *unhalt; 875 double con1, con2, con3, un, me1, me2, me3, res; 876 877 con1 = 36.0; 878 con2 = 72.0; 879 con3 = 84.0; 880 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 881 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 882 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 883 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 884 if (pos != -1) { 885 me1 = mem1->vals[pos] * 1.0; 886 me2 = mem2->vals[pos] * 1.0; 887 me3 = mem3->vals[pos] * 1.0; 888 un = unhalt->vals[pos] * 1.0; 889 } else { 890 me1 = mem1->sum * 1.0; 891 me2 = mem2->sum * 1.0; 892 me3 = mem3->sum * 1.0; 893 un = unhalt->sum * 1.0; 894 } 895 res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un; 896 ret = printf("%1.3f", res); 897 return(ret); 898} 899 900static int 901cache1(struct counters *cpu, int pos) 902{ 903 /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 904 int ret; 905 struct counters *mem; 906 struct counters *unhalt; 907 double con, un, me, res; 908 909 con = 180.0; 910 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 911 mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS"); 912 if (pos != -1) { 913 me = mem->vals[pos] * 1.0; 914 un = unhalt->vals[pos] * 1.0; 915 } else { 916 me = mem->sum * 1.0; 917 un = unhalt->sum * 1.0; 918 } 919 res = (me * con)/un; 920 ret = printf("%1.3f", res); 921 return(ret); 922} 923 924static int 925cache1ib(struct counters *cpu, int pos) 926{ 927 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 928 int ret; 929 struct counters *mem; 930 struct counters *unhalt; 931 double con, un, me, res; 932 933 con = 180.0; 934 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 935 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM"); 936 if (pos != -1) { 937 me = mem->vals[pos] * 1.0; 938 un = unhalt->vals[pos] * 1.0; 939 } else { 940 me = mem->sum * 1.0; 941 un = unhalt->sum * 1.0; 942 } 943 res = (me * con)/un; 944 ret = printf("%1.3f", res); 945 return(ret); 946} 947 948 949static int 950dtlb_missload(struct counters *cpu, int pos) 951{ 952 /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */ 953 int ret; 954 struct counters *dtlb_m, *dtlb_d; 955 struct counters *unhalt; 956 double con, un, d1, d2, res; 957 958 con = 7.0; 959 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 960 dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT"); 961 dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION"); 962 if (pos != -1) { 963 d1 = dtlb_m->vals[pos] * 1.0; 964 d2 = dtlb_d->vals[pos] * 1.0; 965 un = unhalt->vals[pos] * 1.0; 966 } else { 967 d1 = dtlb_m->sum * 1.0; 968 d2 = dtlb_d->sum * 1.0; 969 un = unhalt->sum * 1.0; 970 } 971 res = ((d1 * con) + d2)/un; 972 ret = printf("%1.3f", res); 973 return(ret); 974} 975 976static int 977dtlb_missstore(struct counters *cpu, int pos) 978{ 979 /* 980 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / 981 * CPU_CLK_UNHALTED.THREAD_P (t >= .1) 982 */ 983 int ret; 984 struct counters *dtsb_m, *dtsb_d; 985 struct counters *unhalt; 986 double con, un, d1, d2, res; 987 988 con = 7.0; 989 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 990 dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT"); 991 dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION"); 992 if (pos != -1) { 993 d1 = dtsb_m->vals[pos] * 1.0; 994 d2 = dtsb_d->vals[pos] * 1.0; 995 un = unhalt->vals[pos] * 1.0; 996 } else { 997 d1 = dtsb_m->sum * 1.0; 998 d2 = dtsb_d->sum * 1.0; 999 un = unhalt->sum * 1.0; 1000 } 1001 res = ((d1 * con) + d2)/un; 1002 ret = printf("%1.3f", res); 1003 return(ret); 1004} 1005 1006static int 1007itlb_miss(struct counters *cpu, int pos) 1008{ 1009 /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */ 1010 int ret; 1011 struct counters *itlb; 1012 struct counters *unhalt; 1013 double un, d1, res; 1014 1015 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1016 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1017 if (pos != -1) { 1018 d1 = itlb->vals[pos] * 1.0; 1019 un = unhalt->vals[pos] * 1.0; 1020 } else { 1021 d1 = itlb->sum * 1.0; 1022 un = unhalt->sum * 1.0; 1023 } 1024 res = d1/un; 1025 ret = printf("%1.3f", res); 1026 return(ret); 1027} 1028 1029static int 1030icache_miss(struct counters *cpu, int pos) 1031{ 1032 /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */ 1033 1034 int ret; 1035 struct counters *itlb, *icache; 1036 struct counters *unhalt; 1037 double un, d1, ic, res; 1038 1039 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1040 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1041 icache = find_counter(cpu, "ICACHE.IFETCH_STALL"); 1042 if (pos != -1) { 1043 d1 = itlb->vals[pos] * 1.0; 1044 ic = icache->vals[pos] * 1.0; 1045 un = unhalt->vals[pos] * 1.0; 1046 } else { 1047 d1 = itlb->sum * 1.0; 1048 ic = icache->sum * 1.0; 1049 un = unhalt->sum * 1.0; 1050 } 1051 res = (ic-d1)/un; 1052 ret = printf("%1.3f", res); 1053 return(ret); 1054 1055} 1056 1057static int 1058icache_miss_has(struct counters *cpu, int pos) 1059{ 1060 /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */ 1061 1062 int ret; 1063 struct counters *icache; 1064 struct counters *unhalt; 1065 double un, con, ic, res; 1066 1067 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1068 icache = find_counter(cpu, "ICACHE.MISSES"); 1069 con = 36.0; 1070 if (pos != -1) { 1071 ic = icache->vals[pos] * 1.0; 1072 un = unhalt->vals[pos] * 1.0; 1073 } else { 1074 ic = icache->sum * 1.0; 1075 un = unhalt->sum * 1.0; 1076 } 1077 res = (con * ic)/un; 1078 ret = printf("%1.3f", res); 1079 return(ret); 1080 1081} 1082 1083static int 1084lcp_stall(struct counters *cpu, int pos) 1085{ 1086 /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */ 1087 int ret; 1088 struct counters *ild; 1089 struct counters *unhalt; 1090 double un, d1, res; 1091 1092 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1093 ild = find_counter(cpu, "ILD_STALL.LCP"); 1094 if (pos != -1) { 1095 d1 = ild->vals[pos] * 1.0; 1096 un = unhalt->vals[pos] * 1.0; 1097 } else { 1098 d1 = ild->sum * 1.0; 1099 un = unhalt->sum * 1.0; 1100 } 1101 res = d1/un; 1102 ret = printf("%1.3f", res); 1103 return(ret); 1104 1105} 1106 1107 1108static int 1109frontendstall(struct counters *cpu, int pos) 1110{ 1111 /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */ 1112 int ret; 1113 struct counters *idq; 1114 struct counters *unhalt; 1115 double con, un, id, res; 1116 1117 con = 4.0; 1118 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1119 idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE"); 1120 if (pos != -1) { 1121 id = idq->vals[pos] * 1.0; 1122 un = unhalt->vals[pos] * 1.0; 1123 } else { 1124 id = idq->sum * 1.0; 1125 un = unhalt->sum * 1.0; 1126 } 1127 res = id/(un * con); 1128 ret = printf("%1.3f", res); 1129 return(ret); 1130} 1131 1132static int 1133clears(struct counters *cpu, int pos) 1134{ 1135 /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 ) 1136 * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/ 1137 1138 int ret; 1139 struct counters *clr1, *clr2, *clr3; 1140 struct counters *unhalt; 1141 double con, un, cl1, cl2, cl3, res; 1142 1143 con = 100.0; 1144 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1145 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 1146 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 1147 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 1148 1149 if (pos != -1) { 1150 cl1 = clr1->vals[pos] * 1.0; 1151 cl2 = clr2->vals[pos] * 1.0; 1152 cl3 = clr3->vals[pos] * 1.0; 1153 un = unhalt->vals[pos] * 1.0; 1154 } else { 1155 cl1 = clr1->sum * 1.0; 1156 cl2 = clr2->sum * 1.0; 1157 cl3 = clr3->sum * 1.0; 1158 un = unhalt->sum * 1.0; 1159 } 1160 res = ((cl1 + cl2 + cl3) * con)/un; 1161 ret = printf("%1.3f", res); 1162 return(ret); 1163} 1164 1165static int 1166microassist(struct counters *cpu, int pos) 1167{ 1168 /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */ 1169 int ret; 1170 struct counters *idq; 1171 struct counters *unhalt; 1172 double un, id, res, con; 1173 1174 con = 4.0; 1175 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1176 idq = find_counter(cpu, "IDQ.MS_UOPS"); 1177 if (pos != -1) { 1178 id = idq->vals[pos] * 1.0; 1179 un = unhalt->vals[pos] * 1.0; 1180 } else { 1181 id = idq->sum * 1.0; 1182 un = unhalt->sum * 1.0; 1183 } 1184 res = id/(un * con); 1185 ret = printf("%1.3f", res); 1186 return(ret); 1187} 1188 1189 1190static int 1191aliasing(struct counters *cpu, int pos) 1192{ 1193 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ 1194 int ret; 1195 struct counters *ld; 1196 struct counters *unhalt; 1197 double un, lds, con, res; 1198 1199 con = 5.0; 1200 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1201 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); 1202 if (pos != -1) { 1203 lds = ld->vals[pos] * 1.0; 1204 un = unhalt->vals[pos] * 1.0; 1205 } else { 1206 lds = ld->sum * 1.0; 1207 un = unhalt->sum * 1.0; 1208 } 1209 res = (lds * con)/un; 1210 ret = printf("%1.3f", res); 1211 return(ret); 1212} 1213 1214static int 1215fpassists(struct counters *cpu, int pos) 1216{ 1217 /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */ 1218 int ret; 1219 struct counters *fp; 1220 struct counters *inst; 1221 double un, fpd, res; 1222 1223 inst = find_counter(cpu, "INST_RETIRED.ANY_P"); 1224 fp = find_counter(cpu, "FP_ASSIST.ANY"); 1225 if (pos != -1) { 1226 fpd = fp->vals[pos] * 1.0; 1227 un = inst->vals[pos] * 1.0; 1228 } else { 1229 fpd = fp->sum * 1.0; 1230 un = inst->sum * 1.0; 1231 } 1232 res = fpd/un; 1233 ret = printf("%1.3f", res); 1234 return(ret); 1235} 1236 1237static int 1238otherassistavx(struct counters *cpu, int pos) 1239{ 1240 /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1241 int ret; 1242 struct counters *oth; 1243 struct counters *unhalt; 1244 double un, ot, con, res; 1245 1246 con = 75.0; 1247 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1248 oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE"); 1249 if (pos != -1) { 1250 ot = oth->vals[pos] * 1.0; 1251 un = unhalt->vals[pos] * 1.0; 1252 } else { 1253 ot = oth->sum * 1.0; 1254 un = unhalt->sum * 1.0; 1255 } 1256 res = (ot * con)/un; 1257 ret = printf("%1.3f", res); 1258 return(ret); 1259} 1260 1261static int 1262otherassistsse(struct counters *cpu, int pos) 1263{ 1264 1265 int ret; 1266 struct counters *oth; 1267 struct counters *unhalt; 1268 double un, ot, con, res; 1269 1270 /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1271 con = 75.0; 1272 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1273 oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX"); 1274 if (pos != -1) { 1275 ot = oth->vals[pos] * 1.0; 1276 un = unhalt->vals[pos] * 1.0; 1277 } else { 1278 ot = oth->sum * 1.0; 1279 un = unhalt->sum * 1.0; 1280 } 1281 res = (ot * con)/un; 1282 ret = printf("%1.3f", res); 1283 return(ret); 1284} 1285 1286static int 1287efficiency1(struct counters *cpu, int pos) 1288{ 1289 1290 int ret; 1291 struct counters *uops; 1292 struct counters *unhalt; 1293 double un, ot, con, res; 1294 1295 /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/ 1296 con = 4.0; 1297 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1298 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 1299 if (pos != -1) { 1300 ot = uops->vals[pos] * 1.0; 1301 un = unhalt->vals[pos] * 1.0; 1302 } else { 1303 ot = uops->sum * 1.0; 1304 un = unhalt->sum * 1.0; 1305 } 1306 res = ot/(con * un); 1307 ret = printf("%1.3f", res); 1308 return(ret); 1309} 1310 1311static int 1312efficiency2(struct counters *cpu, int pos) 1313{ 1314 1315 int ret; 1316 struct counters *uops; 1317 struct counters *unhalt; 1318 double un, ot, res; 1319 1320 /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/ 1321 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1322 uops = find_counter(cpu, "INST_RETIRED.ANY_P"); 1323 if (pos != -1) { 1324 ot = uops->vals[pos] * 1.0; 1325 un = unhalt->vals[pos] * 1.0; 1326 } else { 1327 ot = uops->sum * 1.0; 1328 un = unhalt->sum * 1.0; 1329 } 1330 res = un/ot; 1331 ret = printf("%1.3f", res); 1332 return(ret); 1333} 1334 1335#define SANDY_BRIDGE_COUNT 20 1336static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = { 1337/*01*/ { "allocstall1", "thresh > .05", 1338 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1", 1339 allocstall1 }, 1340/*02*/ { "allocstall2", "thresh > .05", 1341 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1", 1342 allocstall2 }, 1343/*03*/ { "br_miss", "thresh >= .2", 1344 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1345 br_mispredict }, 1346/*04*/ { "splitload", "thresh >= .1", 1347 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", 1348 splitload }, 1349/*05*/ { "splitstore", "thresh >= .01", 1350 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1351 splitstore }, 1352/*06*/ { "contested", "thresh >= .05", 1353 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1354 contested }, 1355/*07*/ { "blockstorefwd", "thresh >= .05", 1356 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1357 blockstoreforward }, 1358/*08*/ { "cache2", "thresh >= .2", 1359 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1360 cache2 }, 1361/*09*/ { "cache1", "thresh >= .2", 1362 "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1363 cache1 }, 1364/*10*/ { "dtlbmissload", "thresh >= .1", 1365 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1366 dtlb_missload }, 1367/*11*/ { "dtlbmissstore", "thresh >= .05", 1368 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1369 dtlb_missstore }, 1370/*12*/ { "frontendstall", "thresh >= .15", 1371 "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1372 frontendstall }, 1373/*13*/ { "clears", "thresh >= .02", 1374 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1375 clears }, 1376/*14*/ { "microassist", "thresh >= .05", 1377 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1378 microassist }, 1379/*15*/ { "aliasing_4k", "thresh >= .1", 1380 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1381 aliasing }, 1382/*16*/ { "fpassist", "look for a excessive value", 1383 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1384 fpassists }, 1385/*17*/ { "otherassistavx", "look for a excessive value", 1386 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1387 otherassistavx }, 1388/*18*/ { "otherassistsse", "look for a excessive value", 1389 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1390 otherassistsse }, 1391/*19*/ { "eff1", "thresh < .9", 1392 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1393 efficiency1 }, 1394/*20*/ { "eff2", "thresh > 1.0", 1395 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1396 efficiency2 }, 1397}; 1398 1399 1400#define IVY_BRIDGE_COUNT 21 1401static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = { 1402/*1*/ { "eff1", "thresh < .75", 1403 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1404 efficiency1 }, 1405/*2*/ { "eff2", "thresh > 1.0", 1406 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1407 efficiency2 }, 1408/*3*/ { "itlbmiss", "thresh > .05", 1409 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1410 itlb_miss }, 1411/*4*/ { "icachemiss", "thresh > .05", 1412 "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1413 icache_miss }, 1414/*5*/ { "lcpstall", "thresh > .05", 1415 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1416 lcp_stall }, 1417/*6*/ { "cache1", "thresh >= .2", 1418 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1419 cache1ib }, 1420/*7*/ { "cache2", "thresh >= .2", 1421 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1422 cache2ib }, 1423/*8*/ { "contested", "thresh >= .05", 1424 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1425 contested }, 1426/*9*/ { "datashare", "thresh >= .05", 1427 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1428 datasharing }, 1429/*10*/ { "blockstorefwd", "thresh >= .05", 1430 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1431 blockstoreforward }, 1432/*11*/ { "splitload", "thresh >= .1", 1433 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 1434 splitloadib }, 1435/*12*/ { "splitstore", "thresh >= .01", 1436 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1437 splitstore }, 1438/*13*/ { "aliasing_4k", "thresh >= .1", 1439 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1440 aliasing }, 1441/*14*/ { "dtlbmissload", "thresh >= .1", 1442 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1443 dtlb_missload }, 1444/*15*/ { "dtlbmissstore", "thresh >= .05", 1445 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1446 dtlb_missstore }, 1447/*16*/ { "br_miss", "thresh >= .2", 1448 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", 1449 br_mispredictib }, 1450/*17*/ { "clears", "thresh >= .02", 1451 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1452 clears }, 1453/*18*/ { "microassist", "thresh >= .05", 1454 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1455 microassist }, 1456/*19*/ { "fpassist", "look for a excessive value", 1457 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1458 fpassists }, 1459/*20*/ { "otherassistavx", "look for a excessive value", 1460 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1461 otherassistavx }, 1462/*21*/ { "otherassistsse", "look for a excessive value", 1463 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1464 otherassistsse }, 1465}; 1466 1467#define HASWELL_COUNT 20 1468static struct cpu_entry haswell[HASWELL_COUNT] = { 1469/*1*/ { "eff1", "thresh < .75", 1470 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1471 efficiency1 }, 1472/*2*/ { "eff2", "thresh > 1.0", 1473 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1474 efficiency2 }, 1475/*3*/ { "itlbmiss", "thresh > .05", 1476 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1477 itlb_miss }, 1478/*4*/ { "icachemiss", "thresh > .05", 1479 "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1", 1480 icache_miss_has }, 1481/*5*/ { "lcpstall", "thresh > .05", 1482 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1483 lcp_stall }, 1484/*6*/ { "cache1", "thresh >= .2", 1485 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1486 cache1ib }, 1487/*7*/ { "cache2", "thresh >= .2", 1488 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1489 cache2has }, 1490/*8*/ { "contested", "thresh >= .05", 1491 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1492 contested_has }, 1493/*9*/ { "datashare", "thresh >= .05", 1494 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1495 datasharing_has }, 1496/*10*/ { "blockstorefwd", "thresh >= .05", 1497 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1498 blockstoreforward }, 1499/*11*/ { "splitload", "thresh >= .1", 1500 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", 1501 splitload }, 1502/*12*/ { "splitstore", "thresh >= .01", 1503 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1504 splitstore }, 1505/*13*/ { "aliasing_4k", "thresh >= .1", 1506 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1507 aliasing }, 1508/*14*/ { "dtlbmissload", "thresh >= .1", 1509 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1510 dtlb_missload }, 1511/*15*/ { "br_miss", "thresh >= .2", 1512 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1513 br_mispredict }, 1514/*16*/ { "clears", "thresh >= .02", 1515 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1516 clears }, 1517/*17*/ { "microassist", "thresh >= .05", 1518 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1519 microassist }, 1520/*18*/ { "fpassist", "look for a excessive value", 1521 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1522 fpassists }, 1523/*19*/ { "otherassistavx", "look for a excessive value", 1524 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1525 otherassistavx }, 1526/*20*/ { "otherassistsse", "look for a excessive value", 1527 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1528 otherassistsse }, 1529}; 1530 1531 1532static void 1533set_sandybridge(void) 1534{ 1535 strcpy(the_cpu.cputype, "SandyBridge PMC"); 1536 the_cpu.number = SANDY_BRIDGE_COUNT; 1537 the_cpu.ents = sandy_bridge; 1538 the_cpu.explain = explain_name_sb; 1539} 1540 1541static void 1542set_ivybridge(void) 1543{ 1544 strcpy(the_cpu.cputype, "IvyBridge PMC"); 1545 the_cpu.number = IVY_BRIDGE_COUNT; 1546 the_cpu.ents = ivy_bridge; 1547 the_cpu.explain = explain_name_ib; 1548} 1549 1550 1551static void 1552set_haswell(void) 1553{ 1554 strcpy(the_cpu.cputype, "HASWELL PMC"); 1555 the_cpu.number = HASWELL_COUNT; 1556 the_cpu.ents = haswell; 1557 the_cpu.explain = explain_name_has; 1558} 1559 1560static void 1561set_expression(char *name) 1562{ 1563 int found = 0, i; 1564 for(i=0 ; i< the_cpu.number; i++) { 1565 if (strcmp(name, the_cpu.ents[i].name) == 0) { 1566 found = 1; 1567 expression = the_cpu.ents[i].func; 1568 command = the_cpu.ents[i].command; 1569 threshold = the_cpu.ents[i].thresh; 1570 break; 1571 } 1572 } 1573 if (!found) { 1574 printf("For CPU type %s we have no expression:%s\n", 1575 the_cpu.cputype, name); 1576 exit(-1); 1577 } 1578} 1579 1580 1581 1582 1583 1584static int 1585validate_expression(char *name) 1586{ 1587 int i, found; 1588 1589 found = 0; 1590 for(i=0 ; i< the_cpu.number; i++) { 1591 if (strcmp(name, the_cpu.ents[i].name) == 0) { 1592 found = 1; 1593 break; 1594 } 1595 } 1596 if (!found) { 1597 return(-1); 1598 } 1599 return (0); 1600} 1601 1602static void 1603do_expression(struct counters *cpu, int pos) 1604{ 1605 if (expression == NULL) 1606 return; 1607 (*expression)(cpu, pos); 1608} 1609 1610static void 1611process_header(int idx, char *p) 1612{ 1613 struct counters *up; 1614 int i, len, nlen; 1615 /* 1616 * Given header element idx, at p in 1617 * form 's/NN/nameof' 1618 * process the entry to pull out the name and 1619 * the CPU number. 1620 */ 1621 if (strncmp(p, "s/", 2)) { 1622 printf("Check -- invalid header no s/ in %s\n", 1623 p); 1624 return; 1625 } 1626 up = &cnts[idx]; 1627 up->cpu = strtol(&p[2], NULL, 10); 1628 len = strlen(p); 1629 for (i=2; i<len; i++) { 1630 if (p[i] == '/') { 1631 nlen = strlen(&p[(i+1)]); 1632 if (nlen < (MAX_NLEN-1)) { 1633 strcpy(up->counter_name, &p[(i+1)]); 1634 } else { 1635 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1)); 1636 } 1637 } 1638 } 1639} 1640 1641static void 1642build_counters_from_header(FILE *io) 1643{ 1644 char buffer[8192], *p; 1645 int i, len, cnt; 1646 size_t mlen; 1647 1648 /* We have a new start, lets 1649 * setup our headers and cpus. 1650 */ 1651 if (fgets(buffer, sizeof(buffer), io) == NULL) { 1652 printf("First line can't be read from file err:%d\n", errno); 1653 return; 1654 } 1655 /* 1656 * Ok output is an array of counters. Once 1657 * we start to read the values in we must 1658 * put them in there slot to match there CPU and 1659 * counter being updated. We create a mass array 1660 * of the counters, filling in the CPU and 1661 * counter name. 1662 */ 1663 /* How many do we get? */ 1664 len = strlen(buffer); 1665 for (i=0, cnt=0; i<len; i++) { 1666 if (strncmp(&buffer[i], "s/", 2) == 0) { 1667 cnt++; 1668 for(;i<len;i++) { 1669 if (buffer[i] == ' ') 1670 break; 1671 } 1672 } 1673 } 1674 mlen = sizeof(struct counters) * cnt; 1675 cnts = malloc(mlen); 1676 ncnts = cnt; 1677 if (cnts == NULL) { 1678 printf("No memory err:%d\n", errno); 1679 return; 1680 } 1681 memset(cnts, 0, mlen); 1682 for (i=0, cnt=0; i<len; i++) { 1683 if (strncmp(&buffer[i], "s/", 2) == 0) { 1684 p = &buffer[i]; 1685 for(;i<len;i++) { 1686 if (buffer[i] == ' ') { 1687 buffer[i] = 0; 1688 break; 1689 } 1690 } 1691 process_header(cnt, p); 1692 cnt++; 1693 } 1694 } 1695 if (verbose) 1696 printf("We have %d entries\n", cnt); 1697} 1698extern int max_to_collect; 1699int max_to_collect = MAX_COUNTER_SLOTS; 1700 1701static int 1702read_a_line(FILE *io) 1703{ 1704 char buffer[8192], *p, *stop; 1705 int pos, i; 1706 1707 if (fgets(buffer, sizeof(buffer), io) == NULL) { 1708 return(0); 1709 } 1710 p = buffer; 1711 for (i=0; i<ncnts; i++) { 1712 pos = cnts[i].pos; 1713 cnts[i].vals[pos] = strtol(p, &stop, 0); 1714 cnts[i].pos++; 1715 cnts[i].sum += cnts[i].vals[pos]; 1716 p = stop; 1717 } 1718 return (1); 1719} 1720 1721extern int cpu_count_out; 1722int cpu_count_out=0; 1723 1724static void 1725print_header(void) 1726{ 1727 int i, cnt, printed_cnt; 1728 1729 printf("*********************************\n"); 1730 for(i=0, cnt=0; i<MAX_CPU; i++) { 1731 if (glob_cpu[i]) { 1732 cnt++; 1733 } 1734 } 1735 cpu_count_out = cnt; 1736 for(i=0, printed_cnt=0; i<MAX_CPU; i++) { 1737 if (glob_cpu[i]) { 1738 printf("CPU%d", i); 1739 printed_cnt++; 1740 } 1741 if (printed_cnt == cnt) { 1742 printf("\n"); 1743 break; 1744 } else { 1745 printf("\t"); 1746 } 1747 } 1748} 1749 1750static void 1751lace_cpus_together(void) 1752{ 1753 int i, j, lace_cpu; 1754 struct counters *cpat, *at; 1755 1756 for(i=0; i<ncnts; i++) { 1757 cpat = &cnts[i]; 1758 if (cpat->next_cpu) { 1759 /* Already laced in */ 1760 continue; 1761 } 1762 lace_cpu = cpat->cpu; 1763 if (lace_cpu >= MAX_CPU) { 1764 printf("CPU %d to big\n", lace_cpu); 1765 continue; 1766 } 1767 if (glob_cpu[lace_cpu] == NULL) { 1768 glob_cpu[lace_cpu] = cpat; 1769 } else { 1770 /* Already processed this cpu */ 1771 continue; 1772 } 1773 /* Ok look forward for cpu->cpu and link in */ 1774 for(j=(i+1); j<ncnts; j++) { 1775 at = &cnts[j]; 1776 if (at->next_cpu) { 1777 continue; 1778 } 1779 if (at->cpu == lace_cpu) { 1780 /* Found one */ 1781 cpat->next_cpu = at; 1782 cpat = at; 1783 } 1784 } 1785 } 1786} 1787 1788 1789static void 1790process_file(char *filename) 1791{ 1792 FILE *io; 1793 int i; 1794 int line_at, not_done; 1795 pid_t pid_of_command=0; 1796 1797 if (filename == NULL) { 1798 io = my_popen(command, "r", &pid_of_command); 1799 } else { 1800 io = fopen(filename, "r"); 1801 if (io == NULL) { 1802 printf("Can't process file %s err:%d\n", 1803 filename, errno); 1804 return; 1805 } 1806 } 1807 build_counters_from_header(io); 1808 if (cnts == NULL) { 1809 /* Nothing we can do */ 1810 printf("Nothing to do -- no counters built\n"); 1811 if (io) { 1812 fclose(io); 1813 } 1814 return; 1815 } 1816 lace_cpus_together(); 1817 print_header(); 1818 if (verbose) { 1819 for (i=0; i<ncnts; i++) { 1820 printf("Counter:%s cpu:%d index:%d\n", 1821 cnts[i].counter_name, 1822 cnts[i].cpu, i); 1823 } 1824 } 1825 line_at = 0; 1826 not_done = 1; 1827 while(not_done) { 1828 if (read_a_line(io)) { 1829 line_at++; 1830 } else { 1831 break; 1832 } 1833 if (line_at >= max_to_collect) { 1834 not_done = 0; 1835 } 1836 if (filename == NULL) { 1837 int cnt; 1838 /* For the ones we dynamically open we print now */ 1839 for(i=0, cnt=0; i<MAX_CPU; i++) { 1840 do_expression(glob_cpu[i], (line_at-1)); 1841 cnt++; 1842 if (cnt == cpu_count_out) { 1843 printf("\n"); 1844 break; 1845 } else { 1846 printf("\t"); 1847 } 1848 } 1849 } 1850 } 1851 if (filename) { 1852 fclose(io); 1853 } else { 1854 my_pclose(io, pid_of_command); 1855 } 1856} 1857#if defined(__amd64__) 1858#define cpuid(in,a,b,c,d)\ 1859 asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in)); 1860#else 1861#define cpuid(in, a, b, c, d) 1862#endif 1863 1864static void 1865get_cpuid_set(void) 1866{ 1867 unsigned long eax, ebx, ecx, edx; 1868 int model; 1869 pid_t pid_of_command=0; 1870 size_t sz, len; 1871 FILE *io; 1872 char linebuf[1024], *str; 1873 1874 eax = ebx = ecx = edx = 0; 1875 1876 cpuid(0, eax, ebx, ecx, edx); 1877 if (ebx == 0x68747541) { 1878 printf("AMD processors are not supported by this program\n"); 1879 printf("Sorry\n"); 1880 exit(0); 1881 } else if (ebx == 0x6972794) { 1882 printf("Cyrix processors are not supported by this program\n"); 1883 printf("Sorry\n"); 1884 exit(0); 1885 } else if (ebx == 0x756e6547) { 1886 printf("Genuine Intel\n"); 1887 } else { 1888 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx); 1889 exit(0); 1890 } 1891 cpuid(1, eax, ebx, ecx, edx); 1892 model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4)); 1893 printf("CPU model is 0x%x id:0x%lx\n", model, eax); 1894 switch (eax & 0xF00) { 1895 case 0x500: /* Pentium family processors */ 1896 printf("Intel Pentium P5\n"); 1897 goto not_supported; 1898 break; 1899 case 0x600: /* Pentium Pro, Celeron, Pentium II & III */ 1900 switch (model) { 1901 case 0x1: 1902 printf("Intel Pentium P6\n"); 1903 goto not_supported; 1904 break; 1905 case 0x3: 1906 case 0x5: 1907 printf("Intel PII\n"); 1908 goto not_supported; 1909 break; 1910 case 0x6: case 0x16: 1911 printf("Intel CL\n"); 1912 goto not_supported; 1913 break; 1914 case 0x7: case 0x8: case 0xA: case 0xB: 1915 printf("Intel PIII\n"); 1916 goto not_supported; 1917 break; 1918 case 0x9: case 0xD: 1919 printf("Intel PM\n"); 1920 goto not_supported; 1921 break; 1922 case 0xE: 1923 printf("Intel CORE\n"); 1924 goto not_supported; 1925 break; 1926 case 0xF: 1927 printf("Intel CORE2\n"); 1928 goto not_supported; 1929 break; 1930 case 0x17: 1931 printf("Intel CORE2EXTREME\n"); 1932 goto not_supported; 1933 break; 1934 case 0x1C: /* Per Intel document 320047-002. */ 1935 printf("Intel ATOM\n"); 1936 goto not_supported; 1937 break; 1938 case 0x1A: 1939 case 0x1E: /* 1940 * Per Intel document 253669-032 9/2009, 1941 * pages A-2 and A-57 1942 */ 1943 case 0x1F: /* 1944 * Per Intel document 253669-032 9/2009, 1945 * pages A-2 and A-57 1946 */ 1947 printf("Intel COREI7\n"); 1948 goto not_supported; 1949 break; 1950 case 0x2E: 1951 printf("Intel NEHALEM\n"); 1952 goto not_supported; 1953 break; 1954 case 0x25: /* Per Intel document 253669-033US 12/2009. */ 1955 case 0x2C: /* Per Intel document 253669-033US 12/2009. */ 1956 printf("Intel WESTMERE\n"); 1957 goto not_supported; 1958 break; 1959 case 0x2F: /* Westmere-EX, seen in wild */ 1960 printf("Intel WESTMERE\n"); 1961 goto not_supported; 1962 break; 1963 case 0x2A: /* Per Intel document 253669-039US 05/2011. */ 1964 printf("Intel SANDYBRIDGE\n"); 1965 set_sandybridge(); 1966 break; 1967 case 0x2D: /* Per Intel document 253669-044US 08/2012. */ 1968 printf("Intel SANDYBRIDGE_XEON\n"); 1969 set_sandybridge(); 1970 break; 1971 case 0x3A: /* Per Intel document 253669-043US 05/2012. */ 1972 printf("Intel IVYBRIDGE\n"); 1973 set_ivybridge(); 1974 break; 1975 case 0x3E: /* Per Intel document 325462-045US 01/2013. */ 1976 printf("Intel IVYBRIDGE_XEON\n"); 1977 set_ivybridge(); 1978 break; 1979 case 0x3F: /* Per Intel document 325462-045US 09/2014. */ 1980 printf("Intel HASWELL (Xeon)\n"); 1981 set_haswell(); 1982 break; 1983 case 0x3C: /* Per Intel document 325462-045US 01/2013. */ 1984 case 0x45: 1985 case 0x46: 1986 printf("Intel HASWELL\n"); 1987 set_haswell(); 1988 break; 1989 case 0x4D: 1990 /* Per Intel document 330061-001 01/2014. */ 1991 printf("Intel ATOM_SILVERMONT\n"); 1992 goto not_supported; 1993 break; 1994 default: 1995 printf("Intel model 0x%x is not known -- sorry\n", 1996 model); 1997 goto not_supported; 1998 break; 1999 } 2000 break; 2001 case 0xF00: /* P4 */ 2002 printf("Intel unknown model %d\n", model); 2003 goto not_supported; 2004 break; 2005 } 2006 /* Ok lets load the list of all known PMC's */ 2007 io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command); 2008 if (valid_pmcs == NULL) { 2009 /* Likely */ 2010 pmc_allocated_cnt = PMC_INITIAL_ALLOC; 2011 sz = sizeof(char *) * pmc_allocated_cnt; 2012 valid_pmcs = malloc(sz); 2013 if (valid_pmcs == NULL) { 2014 printf("No memory allocation fails at startup?\n"); 2015 exit(-1); 2016 } 2017 memset(valid_pmcs, 0, sz); 2018 } 2019 2020 while (fgets(linebuf, sizeof(linebuf), io) != NULL) { 2021 if (linebuf[0] != '\t') { 2022 /* sometimes headers ;-) */ 2023 continue; 2024 } 2025 len = strlen(linebuf); 2026 if (linebuf[(len-1)] == '\n') { 2027 /* Likely */ 2028 linebuf[(len-1)] = 0; 2029 } 2030 str = &linebuf[1]; 2031 len = strlen(str) + 1; 2032 valid_pmcs[valid_pmc_cnt] = malloc(len); 2033 if (valid_pmcs[valid_pmc_cnt] == NULL) { 2034 printf("No memory2 allocation fails at startup?\n"); 2035 exit(-1); 2036 } 2037 memset(valid_pmcs[valid_pmc_cnt], 0, len); 2038 strcpy(valid_pmcs[valid_pmc_cnt], str); 2039 valid_pmc_cnt++; 2040 if (valid_pmc_cnt >= pmc_allocated_cnt) { 2041 /* Got to expand -- unlikely */ 2042 char **more; 2043 2044 sz = sizeof(char *) * (pmc_allocated_cnt * 2); 2045 more = malloc(sz); 2046 if (more == NULL) { 2047 printf("No memory3 allocation fails at startup?\n"); 2048 exit(-1); 2049 } 2050 memset(more, 0, sz); 2051 memcpy(more, valid_pmcs, sz); 2052 pmc_allocated_cnt *= 2; 2053 free(valid_pmcs); 2054 valid_pmcs = more; 2055 } 2056 } 2057 my_pclose(io, pid_of_command); 2058 return; 2059not_supported: 2060 printf("Not supported\n"); 2061 exit(-1); 2062} 2063 2064static void 2065explain_all(void) 2066{ 2067 int i; 2068 printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype); 2069 printf("-------------------------------------------------------------\n"); 2070 for(i=0; i<the_cpu.number; i++){ 2071 printf("For -e %s ", the_cpu.ents[i].name); 2072 (*the_cpu.explain)(the_cpu.ents[i].name); 2073 printf("----------------------------\n"); 2074 } 2075} 2076 2077static void 2078test_for_a_pmc(const char *pmc, int out_so_far) 2079{ 2080 FILE *io; 2081 pid_t pid_of_command=0; 2082 char my_command[1024]; 2083 char line[1024]; 2084 char resp[1024]; 2085 int len, llen, i; 2086 2087 if (out_so_far < 50) { 2088 len = 50 - out_so_far; 2089 for(i=0; i<len; i++) { 2090 printf(" "); 2091 } 2092 } 2093 sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc); 2094 io = my_popen(my_command, "r", &pid_of_command); 2095 if (io == NULL) { 2096 printf("Failed -- popen fails\n"); 2097 return; 2098 } 2099 /* Setup what we expect */ 2100 len = sprintf(resp, "%s", pmc); 2101 if (fgets(line, sizeof(line), io) == NULL) { 2102 printf("Failed -- no output from pmstat\n"); 2103 goto out; 2104 } 2105 llen = strlen(line); 2106 if (line[(llen-1)] == '\n') { 2107 line[(llen-1)] = 0; 2108 llen--; 2109 } 2110 for(i=2; i<(llen-len); i++) { 2111 if (strncmp(&line[i], "ERROR", 5) == 0) { 2112 printf("Failed %s\n", line); 2113 goto out; 2114 } else if (strncmp(&line[i], resp, len) == 0) { 2115 int j, k; 2116 2117 if (fgets(line, sizeof(line), io) == NULL) { 2118 printf("Failed -- no second output from pmstat\n"); 2119 goto out; 2120 } 2121 len = strlen(line); 2122 for (j=0; j<len; j++) { 2123 if (line[j] == ' ') { 2124 j++; 2125 } else { 2126 break; 2127 } 2128 } 2129 printf("Pass"); 2130 len = strlen(&line[j]); 2131 if (len < 20) { 2132 for(k=0; k<(20-len); k++) { 2133 printf(" "); 2134 } 2135 } 2136 if (len) { 2137 printf("%s", &line[j]); 2138 } else { 2139 printf("\n"); 2140 } 2141 goto out; 2142 } 2143 } 2144 printf("Failed -- '%s' not '%s'\n", line, resp); 2145out: 2146 my_pclose(io, pid_of_command); 2147 2148} 2149 2150static int 2151add_it_to(char **vars, int cur_cnt, char *name) 2152{ 2153 int i; 2154 size_t len; 2155 for(i=0; i<cur_cnt; i++) { 2156 if (strcmp(vars[i], name) == 0) { 2157 /* Already have */ 2158 return(0); 2159 } 2160 } 2161 if (vars[cur_cnt] != NULL) { 2162 printf("Cur_cnt:%d filled with %s??\n", 2163 cur_cnt, vars[cur_cnt]); 2164 exit(-1); 2165 } 2166 /* Ok its new */ 2167 len = strlen(name) + 1; 2168 vars[cur_cnt] = malloc(len); 2169 if (vars[cur_cnt] == NULL) { 2170 printf("No memory %s\n", __FUNCTION__); 2171 exit(-1); 2172 } 2173 memset(vars[cur_cnt], 0, len); 2174 strcpy(vars[cur_cnt], name); 2175 return(1); 2176} 2177 2178static char * 2179build_command_for_exp(struct expression *exp) 2180{ 2181 /* 2182 * Build the pmcstat command to handle 2183 * the passed in expression. 2184 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ 2185 * where NNN and QQQ represent the PMC's in the expression 2186 * uniquely.. 2187 */ 2188 char forming[1024]; 2189 int cnt_pmc, alloced_pmcs, i; 2190 struct expression *at; 2191 char **vars, *cmd; 2192 size_t mal; 2193 2194 alloced_pmcs = cnt_pmc = 0; 2195 /* first how many do we have */ 2196 at = exp; 2197 while (at) { 2198 if (at->type == TYPE_VALUE_PMC) { 2199 cnt_pmc++; 2200 } 2201 at = at->next; 2202 } 2203 if (cnt_pmc == 0) { 2204 printf("No PMC's in your expression -- nothing to do!!\n"); 2205 exit(0); 2206 } 2207 mal = cnt_pmc * sizeof(char *); 2208 vars = malloc(mal); 2209 if (vars == NULL) { 2210 printf("No memory\n"); 2211 exit(-1); 2212 } 2213 memset(vars, 0, mal); 2214 at = exp; 2215 while (at) { 2216 if (at->type == TYPE_VALUE_PMC) { 2217 if(add_it_to(vars, alloced_pmcs, at->name)) { 2218 alloced_pmcs++; 2219 } 2220 } 2221 at = at->next; 2222 } 2223 /* Now we have a unique list in vars so create our command */ 2224 mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */ 2225 for(i=0; i<alloced_pmcs; i++) { 2226 mal += strlen(vars[i]) + 4; /* var + " -s " */ 2227 } 2228 cmd = malloc((mal+2)); 2229 if (cmd == NULL) { 2230 printf("%s out of mem\n", __FUNCTION__); 2231 exit(-1); 2232 } 2233 memset(cmd, 0, (mal+2)); 2234 strcpy(cmd, "/usr/sbin/pmcstat -w 1"); 2235 at = exp; 2236 for(i=0; i<alloced_pmcs; i++) { 2237 sprintf(forming, " -s %s", vars[i]); 2238 strcat(cmd, forming); 2239 free(vars[i]); 2240 vars[i] = NULL; 2241 } 2242 free(vars); 2243 return(cmd); 2244} 2245 2246static int 2247user_expr(struct counters *cpu, int pos) 2248{ 2249 int ret; 2250 double res; 2251 struct counters *var; 2252 struct expression *at; 2253 2254 at = master_exp; 2255 while (at) { 2256 if (at->type == TYPE_VALUE_PMC) { 2257 var = find_counter(cpu, at->name); 2258 if (var == NULL) { 2259 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name); 2260 exit(-1); 2261 } 2262 if (pos != -1) { 2263 at->value = var->vals[pos] * 1.0; 2264 } else { 2265 at->value = var->sum * 1.0; 2266 } 2267 } 2268 at = at->next; 2269 } 2270 res = run_expr(master_exp, 1, NULL); 2271 ret = printf("%1.3f", res); 2272 return(ret); 2273} 2274 2275 2276static void 2277set_manual_exp(struct expression *exp) 2278{ 2279 expression = user_expr; 2280 command = build_command_for_exp(exp); 2281 threshold = "User defined threshold"; 2282} 2283 2284static void 2285run_tests(void) 2286{ 2287 int i, lenout; 2288 printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt); 2289 printf("------------------------------------------------------------------------\n"); 2290 for(i=0; i<valid_pmc_cnt; i++) { 2291 lenout = printf("%s", valid_pmcs[i]); 2292 fflush(stdout); 2293 test_for_a_pmc(valid_pmcs[i], lenout); 2294 } 2295} 2296static void 2297list_all(void) 2298{ 2299 int i, cnt, j; 2300 printf("PMC Abbreviation\n"); 2301 printf("--------------------------------------------------------------\n"); 2302 for(i=0; i<valid_pmc_cnt; i++) { 2303 cnt = printf("%s", valid_pmcs[i]); 2304 for(j=cnt; j<52; j++) { 2305 printf(" "); 2306 } 2307 printf("%%%d\n", i); 2308 } 2309} 2310 2311 2312int 2313main(int argc, char **argv) 2314{ 2315 int i, j, cnt; 2316 char *filename=NULL; 2317 char *name=NULL; 2318 int help_only = 0; 2319 int test_mode = 0; 2320 2321 get_cpuid_set(); 2322 memset(glob_cpu, 0, sizeof(glob_cpu)); 2323 while ((i = getopt(argc, argv, "LHhvm:i:?e:TE:")) != -1) { 2324 switch (i) { 2325 case 'L': 2326 list_all(); 2327 return(0); 2328 case 'H': 2329 printf("**********************************\n"); 2330 explain_all(); 2331 printf("**********************************\n"); 2332 return(0); 2333 break; 2334 case 'T': 2335 test_mode = 1; 2336 break; 2337 case 'E': 2338 master_exp = parse_expression(optarg); 2339 if (master_exp) { 2340 set_manual_exp(master_exp); 2341 } 2342 break; 2343 case 'e': 2344 if (validate_expression(optarg)) { 2345 printf("Unknown expression %s\n", optarg); 2346 return(0); 2347 } 2348 name = optarg; 2349 set_expression(optarg); 2350 break; 2351 case 'm': 2352 max_to_collect = strtol(optarg, NULL, 0); 2353 if (max_to_collect > MAX_COUNTER_SLOTS) { 2354 /* You can't collect more than max in array */ 2355 max_to_collect = MAX_COUNTER_SLOTS; 2356 } 2357 break; 2358 case 'v': 2359 verbose++; 2360 break; 2361 case 'h': 2362 help_only = 1; 2363 break; 2364 case 'i': 2365 filename = optarg; 2366 break; 2367 case '?': 2368 default: 2369 use: 2370 printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n", 2371 argv[0]); 2372 printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n"); 2373 printf("-v -- verbose dump debug type things -- you don't want this\n"); 2374 printf("-m N -- maximum to collect is N measurments\n"); 2375 printf("-e expr-name -- Do expression expr-name\n"); 2376 printf("-E 'your expression' -- Do your expression\n"); 2377 printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n"); 2378 printf("-H -- Don't run anything, just explain all canned expressions\n"); 2379 printf("-T -- Test all PMC's defined by this processor\n"); 2380 return(0); 2381 break; 2382 }; 2383 } 2384 if ((name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) { 2385 printf("Without setting an expression we cannot dynamically gather information\n"); 2386 printf("you must supply a filename (and you probably want verbosity)\n"); 2387 goto use; 2388 } 2389 if (test_mode) { 2390 run_tests(); 2391 return(0); 2392 } 2393 printf("*********************************\n"); 2394 if (master_exp == NULL) { 2395 (*the_cpu.explain)(name); 2396 } else { 2397 printf("Examine your expression "); 2398 print_exp(master_exp); 2399 printf("User defined threshold\n"); 2400 } 2401 if (help_only) { 2402 return(0); 2403 } 2404 process_file(filename); 2405 if (verbose >= 2) { 2406 for (i=0; i<ncnts; i++) { 2407 printf("Counter:%s cpu:%d index:%d\n", 2408 cnts[i].counter_name, 2409 cnts[i].cpu, i); 2410 for(j=0; j<cnts[i].pos; j++) { 2411 printf(" val - %ld\n", (long int)cnts[i].vals[j]); 2412 } 2413 printf(" sum - %ld\n", (long int)cnts[i].sum); 2414 } 2415 } 2416 if (expression == NULL) { 2417 return(0); 2418 } 2419 for(i=0, cnt=0; i<MAX_CPU; i++) { 2420 if (glob_cpu[i]) { 2421 do_expression(glob_cpu[i], -1); 2422 cnt++; 2423 if (cnt == cpu_count_out) { 2424 printf("\n"); 2425 break; 2426 } else { 2427 printf("\t"); 2428 } 2429 } 2430 } 2431 return(0); 2432} 2433