1277177Srrs/*- 2277177Srrs * Copyright (c) 2014, 2015 Netflix Inc. 3277177Srrs * All rights reserved. 4277177Srrs * 5277177Srrs * Redistribution and use in source and binary forms, with or without 6277177Srrs * modification, are permitted provided that the following conditions 7277177Srrs * are met: 8277177Srrs * 1. Redistributions of source code must retain the above copyright 9277177Srrs * notice, this list of conditions and the following disclaimer, 10277177Srrs * in this position and unchanged. 11277177Srrs * 2. Redistributions in binary form must reproduce the above copyright 12277177Srrs * notice, this list of conditions and the following disclaimer in the 13277177Srrs * documentation and/or other materials provided with the distribution. 14277177Srrs * 3. The name of the author may not be used to endorse or promote products 15277177Srrs * derived from this software without specific prior written permission 16277177Srrs * 17277177Srrs * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18277177Srrs * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19277177Srrs * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20277177Srrs * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21277177Srrs * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22277177Srrs * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23277177Srrs * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24277177Srrs * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25277177Srrs * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26277177Srrs * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27277177Srrs */ 28277177Srrs#include <sys/types.h> 29277177Srrs#include <stdio.h> 30277177Srrs#include <stdlib.h> 31277177Srrs#include <unistd.h> 32277177Srrs#include <string.h> 33277177Srrs#include <strings.h> 34277177Srrs#include <sys/errno.h> 35277177Srrs#include <signal.h> 36277177Srrs#include <sys/wait.h> 37277177Srrs#include <getopt.h> 38277177Srrs#include "eval_expr.h" 39277177Srrs__FBSDID("$FreeBSD: releng/10.2/usr.sbin/pmcstudy/pmcstudy.c 285853 2015-07-24 19:37:30Z emaste $"); 40277177Srrs 41277177Srrs#define MAX_COUNTER_SLOTS 1024 42277177Srrs#define MAX_NLEN 64 43277177Srrs#define MAX_CPU 64 44277177Srrsstatic int verbose = 0; 45277177Srrs 46277177Srrsextern char **environ; 47277177Srrsextern struct expression *master_exp; 48277177Srrsstruct expression *master_exp=NULL; 49277177Srrs 50277177Srrs#define PMC_INITIAL_ALLOC 512 51277177Srrsextern char **valid_pmcs; 52277177Srrschar **valid_pmcs = NULL; 53277177Srrsextern int valid_pmc_cnt; 54277177Srrsint valid_pmc_cnt=0; 55277177Srrsextern int pmc_allocated_cnt; 56277177Srrsint pmc_allocated_cnt=0; 57277177Srrs 58277177Srrs/* 59277177Srrs * The following two varients on popen and pclose with 60277177Srrs * the cavet that they get you the PID so that you 61277177Srrs * can supply it to pclose so it can send a SIGTERM 62277177Srrs * to the process. 63277177Srrs */ 64277177Srrsstatic FILE * 65277177Srrsmy_popen(const char *command, const char *dir, pid_t *p_pid) 66277177Srrs{ 67277177Srrs FILE *io_out, *io_in; 68277177Srrs int pdesin[2], pdesout[2]; 69277177Srrs char *argv[4]; 70277177Srrs pid_t pid; 71277177Srrs char cmd[4]; 72277177Srrs char cmd2[1024]; 73277177Srrs char arg1[4]; 74277177Srrs 75277177Srrs if ((strcmp(dir, "r") != 0) && 76277177Srrs (strcmp(dir, "w") != 0)) { 77277177Srrs errno = EINVAL; 78277177Srrs return(NULL); 79277177Srrs } 80277177Srrs if (pipe(pdesin) < 0) 81277177Srrs return (NULL); 82277177Srrs 83277177Srrs if (pipe(pdesout) < 0) { 84277177Srrs (void)close(pdesin[0]); 85277177Srrs (void)close(pdesin[1]); 86277177Srrs return (NULL); 87277177Srrs } 88277177Srrs strcpy(cmd, "sh"); 89277177Srrs strcpy(arg1, "-c"); 90277177Srrs strcpy(cmd2, command); 91277177Srrs argv[0] = cmd; 92277177Srrs argv[1] = arg1; 93277177Srrs argv[2] = cmd2; 94277177Srrs argv[3] = NULL; 95277177Srrs 96277177Srrs switch (pid = fork()) { 97277177Srrs case -1: /* Error. */ 98277177Srrs (void)close(pdesin[0]); 99277177Srrs (void)close(pdesin[1]); 100277177Srrs (void)close(pdesout[0]); 101277177Srrs (void)close(pdesout[1]); 102277177Srrs return (NULL); 103277177Srrs /* NOTREACHED */ 104277177Srrs case 0: /* Child. */ 105277177Srrs /* Close out un-used sides */ 106277177Srrs (void)close(pdesin[1]); 107277177Srrs (void)close(pdesout[0]); 108277177Srrs /* Now prepare the stdin of the process */ 109277177Srrs close(0); 110277177Srrs (void)dup(pdesin[0]); 111277177Srrs (void)close(pdesin[0]); 112277177Srrs /* Now prepare the stdout of the process */ 113277177Srrs close(1); 114277177Srrs (void)dup(pdesout[1]); 115277177Srrs /* And lets do stderr just in case */ 116277177Srrs close(2); 117277177Srrs (void)dup(pdesout[1]); 118277177Srrs (void)close(pdesout[1]); 119277177Srrs /* Now run it */ 120277177Srrs execve("/bin/sh", argv, environ); 121277177Srrs exit(127); 122277177Srrs /* NOTREACHED */ 123277177Srrs } 124277177Srrs /* Parent; assume fdopen can't fail. */ 125277177Srrs /* Store the pid */ 126277177Srrs *p_pid = pid; 127277177Srrs if (strcmp(dir, "r") != 0) { 128277177Srrs io_out = fdopen(pdesin[1], "w"); 129277177Srrs (void)close(pdesin[0]); 130277177Srrs (void)close(pdesout[0]); 131277177Srrs (void)close(pdesout[1]); 132277177Srrs return(io_out); 133277177Srrs } else { 134277177Srrs /* Prepare the input stream */ 135277177Srrs io_in = fdopen(pdesout[0], "r"); 136277177Srrs (void)close(pdesout[1]); 137277177Srrs (void)close(pdesin[0]); 138277177Srrs (void)close(pdesin[1]); 139277177Srrs return (io_in); 140277177Srrs } 141277177Srrs} 142277177Srrs 143277177Srrs/* 144277177Srrs * pclose -- 145277177Srrs * Pclose returns -1 if stream is not associated with a `popened' command, 146277177Srrs * if already `pclosed', or waitpid returns an error. 147277177Srrs */ 148277177Srrsstatic void 149277177Srrsmy_pclose(FILE *io, pid_t the_pid) 150277177Srrs{ 151277177Srrs int pstat; 152277177Srrs pid_t pid; 153277177Srrs 154277177Srrs /* 155277177Srrs * Find the appropriate file pointer and remove it from the list. 156277177Srrs */ 157277177Srrs (void)fclose(io); 158277177Srrs /* Die if you are not dead! */ 159277177Srrs kill(the_pid, SIGTERM); 160277177Srrs do { 161277177Srrs pid = wait4(the_pid, &pstat, 0, (struct rusage *)0); 162277177Srrs } while (pid == -1 && errno == EINTR); 163277177Srrs} 164277177Srrs 165277177Srrsstruct counters { 166277177Srrs struct counters *next_cpu; 167277177Srrs char counter_name[MAX_NLEN]; /* Name of counter */ 168277177Srrs int cpu; /* CPU we are on */ 169277177Srrs int pos; /* Index we are filling to. */ 170277177Srrs uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */ 171277177Srrs uint64_t sum; /* Summary of entries */ 172277177Srrs}; 173277177Srrs 174277177Srrsextern struct counters *glob_cpu[MAX_CPU]; 175277177Srrsstruct counters *glob_cpu[MAX_CPU]; 176277177Srrs 177277177Srrsextern struct counters *cnts; 178277177Srrsstruct counters *cnts=NULL; 179277177Srrs 180277177Srrsextern int ncnts; 181277177Srrsint ncnts=0; 182277177Srrs 183277177Srrsextern int (*expression)(struct counters *, int); 184277177Srrsint (*expression)(struct counters *, int); 185277177Srrs 186277177Srrsstatic const char *threshold=NULL; 187277177Srrsstatic const char *command; 188277177Srrs 189277177Srrsstruct cpu_entry { 190277177Srrs const char *name; 191277177Srrs const char *thresh; 192277177Srrs const char *command; 193277177Srrs int (*func)(struct counters *, int); 194277177Srrs}; 195277177Srrs 196277177Srrs 197277177Srrsstruct cpu_type { 198277177Srrs char cputype[32]; 199277177Srrs int number; 200277177Srrs struct cpu_entry *ents; 201277177Srrs void (*explain)(const char *name); 202277177Srrs}; 203277177Srrsextern struct cpu_type the_cpu; 204277177Srrsstruct cpu_type the_cpu; 205277177Srrs 206277177Srrsstatic void 207277177Srrsexplain_name_sb(const char *name) 208277177Srrs{ 209277177Srrs const char *mythresh; 210277177Srrs if (strcmp(name, "allocstall1") == 0) { 211277177Srrs printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n"); 212277177Srrs mythresh = "thresh > .05"; 213277177Srrs } else if (strcmp(name, "allocstall2") == 0) { 214277177Srrs printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n"); 215277177Srrs mythresh = "thresh > .05"; 216277177Srrs } else if (strcmp(name, "br_miss") == 0) { 217277177Srrs printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n"); 218277177Srrs mythresh = "thresh >= .2"; 219277177Srrs } else if (strcmp(name, "splitload") == 0) { 220277177Srrs printf("Examine MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 221277177Srrs mythresh = "thresh >= .1"; 222277177Srrs } else if (strcmp(name, "splitstore") == 0) { 223277177Srrs printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); 224277177Srrs mythresh = "thresh >= .01"; 225277177Srrs } else if (strcmp(name, "contested") == 0) { 226277177Srrs printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 227277177Srrs mythresh = "thresh >= .05"; 228277177Srrs } else if (strcmp(name, "blockstorefwd") == 0) { 229277177Srrs printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 230277177Srrs mythresh = "thresh >= .05"; 231277177Srrs } else if (strcmp(name, "cache2") == 0) { 232277177Srrs printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n"); 233277177Srrs printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n"); 234277177Srrs printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n"); 235277177Srrs printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n"); 236277177Srrs mythresh = "thresh >= .2"; 237277177Srrs } else if (strcmp(name, "cache1") == 0) { 238277177Srrs printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 239277177Srrs mythresh = "thresh >= .2"; 240277177Srrs } else if (strcmp(name, "dtlbmissload") == 0) { 241277177Srrs printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 242277177Srrs printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 243277177Srrs mythresh = "thresh >= .1"; 244277177Srrs } else if (strcmp(name, "frontendstall") == 0) { 245277177Srrs printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 246277177Srrs mythresh = "thresh >= .15"; 247277177Srrs } else if (strcmp(name, "clears") == 0) { 248277177Srrs printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 249277177Srrs printf(" MACHINE_CLEARS.SMC + \n"); 250277177Srrs printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 251277177Srrs mythresh = "thresh >= .02"; 252277177Srrs } else if (strcmp(name, "microassist") == 0) { 253277177Srrs printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 254277177Srrs printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 255277177Srrs mythresh = "thresh >= .05"; 256277177Srrs } else if (strcmp(name, "aliasing_4k") == 0) { 257277177Srrs printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 258277177Srrs mythresh = "thresh >= .1"; 259277177Srrs } else if (strcmp(name, "fpassist") == 0) { 260277177Srrs printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 261277177Srrs mythresh = "look for a excessive value"; 262277177Srrs } else if (strcmp(name, "otherassistavx") == 0) { 263277177Srrs printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 264277177Srrs mythresh = "look for a excessive value"; 265277177Srrs } else if (strcmp(name, "otherassistsse") == 0) { 266277177Srrs printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 267277177Srrs mythresh = "look for a excessive value"; 268277177Srrs } else if (strcmp(name, "eff1") == 0) { 269277177Srrs printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 270277177Srrs mythresh = "thresh < .9"; 271277177Srrs } else if (strcmp(name, "eff2") == 0) { 272277177Srrs printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 273277177Srrs mythresh = "thresh > 1.0"; 274277177Srrs } else if (strcmp(name, "dtlbmissstore") == 0) { 275277177Srrs printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 276277177Srrs printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 277277177Srrs mythresh = "thresh >= .05"; 278277177Srrs } else { 279277177Srrs printf("Unknown name:%s\n", name); 280277177Srrs mythresh = "unknown entry"; 281277177Srrs } 282277177Srrs printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 283277177Srrs} 284277177Srrs 285277177Srrsstatic void 286277177Srrsexplain_name_ib(const char *name) 287277177Srrs{ 288277177Srrs const char *mythresh; 289277177Srrs if (strcmp(name, "br_miss") == 0) { 290277177Srrs printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n"); 291277177Srrs printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n"); 292277177Srrs printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n"); 293277177Srrs mythresh = "thresh >= .2"; 294277177Srrs } else if (strcmp(name, "eff1") == 0) { 295277177Srrs printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 296277177Srrs mythresh = "thresh < .9"; 297277177Srrs } else if (strcmp(name, "eff2") == 0) { 298277177Srrs printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 299277177Srrs mythresh = "thresh > 1.0"; 300277177Srrs } else if (strcmp(name, "cache1") == 0) { 301277177Srrs printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 302277177Srrs mythresh = "thresh >= .2"; 303277177Srrs } else if (strcmp(name, "cache2") == 0) { 304277177Srrs printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n"); 305277177Srrs mythresh = "thresh >= .2"; 306277177Srrs } else if (strcmp(name, "itlbmiss") == 0) { 307277177Srrs printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 308277177Srrs mythresh = "thresh > .05"; 309277177Srrs } else if (strcmp(name, "icachemiss") == 0) { 310277177Srrs printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); 311277177Srrs mythresh = "thresh > .05"; 312277177Srrs } else if (strcmp(name, "lcpstall") == 0) { 313277177Srrs printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 314277177Srrs mythresh = "thresh > .05"; 315277177Srrs } else if (strcmp(name, "datashare") == 0) { 316277177Srrs printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n"); 317277177Srrs mythresh = "thresh > .05"; 318277177Srrs } else if (strcmp(name, "blockstorefwd") == 0) { 319277177Srrs printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 320277177Srrs mythresh = "thresh >= .05"; 321277177Srrs } else if (strcmp(name, "splitload") == 0) { 322277177Srrs printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n"); 323277177Srrs printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n"); 324277177Srrs mythresh = "thresh >= .1"; 325277177Srrs } else if (strcmp(name, "splitstore") == 0) { 326277177Srrs printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); 327277177Srrs mythresh = "thresh >= .01"; 328277177Srrs } else if (strcmp(name, "aliasing_4k") == 0) { 329277177Srrs printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 330277177Srrs mythresh = "thresh >= .1"; 331277177Srrs } else if (strcmp(name, "dtlbmissload") == 0) { 332277177Srrs printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 333277177Srrs printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 334277177Srrs mythresh = "thresh >= .1"; 335277177Srrs } else if (strcmp(name, "dtlbmissstore") == 0) { 336277177Srrs printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 337277177Srrs printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 338277177Srrs mythresh = "thresh >= .05"; 339277177Srrs } else if (strcmp(name, "contested") == 0) { 340277177Srrs printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 341277177Srrs mythresh = "thresh >= .05"; 342277177Srrs } else if (strcmp(name, "clears") == 0) { 343277177Srrs printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 344277177Srrs printf(" MACHINE_CLEARS.SMC + \n"); 345277177Srrs printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 346277177Srrs mythresh = "thresh >= .02"; 347277177Srrs } else if (strcmp(name, "microassist") == 0) { 348277177Srrs printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 349277177Srrs printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 350277177Srrs mythresh = "thresh >= .05"; 351277177Srrs } else if (strcmp(name, "fpassist") == 0) { 352277177Srrs printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 353277177Srrs mythresh = "look for a excessive value"; 354277177Srrs } else if (strcmp(name, "otherassistavx") == 0) { 355277177Srrs printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 356277177Srrs mythresh = "look for a excessive value"; 357277177Srrs } else if (strcmp(name, "otherassistsse") == 0) { 358277177Srrs printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 359277177Srrs mythresh = "look for a excessive value"; 360277177Srrs } else { 361277177Srrs printf("Unknown name:%s\n", name); 362277177Srrs mythresh = "unknown entry"; 363277177Srrs } 364277177Srrs printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 365277177Srrs} 366277177Srrs 367277177Srrs 368277177Srrsstatic void 369277177Srrsexplain_name_has(const char *name) 370277177Srrs{ 371277177Srrs const char *mythresh; 372277177Srrs if (strcmp(name, "eff1") == 0) { 373277177Srrs printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 374277177Srrs mythresh = "thresh < .75"; 375277177Srrs } else if (strcmp(name, "eff2") == 0) { 376277177Srrs printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 377277177Srrs mythresh = "thresh > 1.0"; 378277177Srrs } else if (strcmp(name, "itlbmiss") == 0) { 379277177Srrs printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 380277177Srrs mythresh = "thresh > .05"; 381277177Srrs } else if (strcmp(name, "icachemiss") == 0) { 382277177Srrs printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n"); 383277177Srrs mythresh = "thresh > .05"; 384277177Srrs } else if (strcmp(name, "lcpstall") == 0) { 385277177Srrs printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 386277177Srrs mythresh = "thresh > .05"; 387277177Srrs } else if (strcmp(name, "cache1") == 0) { 388277177Srrs printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 389277177Srrs mythresh = "thresh >= .2"; 390277177Srrs } else if (strcmp(name, "cache2") == 0) { 391277177Srrs printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n"); 392277177Srrs printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n"); 393277177Srrs printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n"); 394277177Srrs printf(" / CPU_CLK_UNHALTED.THREAD_P\n"); 395277177Srrs mythresh = "thresh >= .2"; 396277177Srrs } else if (strcmp(name, "contested") == 0) { 397277177Srrs printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n"); 398277177Srrs mythresh = "thresh >= .05"; 399277177Srrs } else if (strcmp(name, "datashare") == 0) { 400277177Srrs printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); 401277177Srrs mythresh = "thresh > .05"; 402277177Srrs } else if (strcmp(name, "blockstorefwd") == 0) { 403277177Srrs printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 404277177Srrs mythresh = "thresh >= .05"; 405277177Srrs } else if (strcmp(name, "splitload") == 0) { 406277177Srrs printf("Examine (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 407277177Srrs mythresh = "thresh >= .1"; 408277177Srrs } else if (strcmp(name, "splitstore") == 0) { 409277177Srrs printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); 410277177Srrs mythresh = "thresh >= .01"; 411277177Srrs } else if (strcmp(name, "aliasing_4k") == 0) { 412277177Srrs printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 413277177Srrs mythresh = "thresh >= .1"; 414277177Srrs } else if (strcmp(name, "dtlbmissload") == 0) { 415277177Srrs printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 416277177Srrs printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 417277177Srrs mythresh = "thresh >= .1"; 418277177Srrs } else if (strcmp(name, "br_miss") == 0) { 419277177Srrs printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n"); 420277177Srrs mythresh = "thresh >= .2"; 421277177Srrs } else if (strcmp(name, "clears") == 0) { 422277177Srrs printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 423277177Srrs printf(" MACHINE_CLEARS.SMC + \n"); 424277177Srrs printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 425277177Srrs mythresh = "thresh >= .02"; 426277177Srrs } else if (strcmp(name, "microassist") == 0) { 427277177Srrs printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 428277177Srrs printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 429277177Srrs mythresh = "thresh >= .05"; 430277177Srrs } else if (strcmp(name, "fpassist") == 0) { 431277177Srrs printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 432277177Srrs mythresh = "look for a excessive value"; 433277177Srrs } else if (strcmp(name, "otherassistavx") == 0) { 434277177Srrs printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 435277177Srrs mythresh = "look for a excessive value"; 436277177Srrs } else if (strcmp(name, "otherassistsse") == 0) { 437277177Srrs printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 438277177Srrs mythresh = "look for a excessive value"; 439277177Srrs } else { 440277177Srrs printf("Unknown name:%s\n", name); 441277177Srrs mythresh = "unknown entry"; 442277177Srrs } 443277177Srrs printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 444277177Srrs} 445277177Srrs 446277177Srrs 447277177Srrsstatic struct counters * 448277177Srrsfind_counter(struct counters *base, const char *name) 449277177Srrs{ 450277177Srrs struct counters *at; 451277177Srrs int len; 452277177Srrs 453277177Srrs at = base; 454277177Srrs len = strlen(name); 455277177Srrs while(at) { 456277177Srrs if (strncmp(at->counter_name, name, len) == 0) { 457277177Srrs return(at); 458277177Srrs } 459277177Srrs at = at->next_cpu; 460277177Srrs } 461277177Srrs printf("Can't find counter %s\n", name); 462277177Srrs printf("We have:\n"); 463277177Srrs at = base; 464277177Srrs while(at) { 465277177Srrs printf("- %s\n", at->counter_name); 466277177Srrs at = at->next_cpu; 467277177Srrs } 468277177Srrs exit(-1); 469277177Srrs} 470277177Srrs 471277177Srrsstatic int 472277177Srrsallocstall1(struct counters *cpu, int pos) 473277177Srrs{ 474277177Srrs/* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/ 475277177Srrs int ret; 476277177Srrs struct counters *partial; 477277177Srrs struct counters *unhalt; 478277177Srrs double un, par, res; 479277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 480277177Srrs partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW"); 481277177Srrs if (pos != -1) { 482277177Srrs par = partial->vals[pos] * 1.0; 483277177Srrs un = unhalt->vals[pos] * 1.0; 484277177Srrs } else { 485277177Srrs par = partial->sum * 1.0; 486277177Srrs un = unhalt->sum * 1.0; 487277177Srrs } 488277177Srrs res = par/un; 489277177Srrs ret = printf("%1.3f", res); 490277177Srrs return(ret); 491277177Srrs} 492277177Srrs 493277177Srrsstatic int 494277177Srrsallocstall2(struct counters *cpu, int pos) 495277177Srrs{ 496277177Srrs/* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 497277177Srrs int ret; 498277177Srrs struct counters *partial; 499277177Srrs struct counters *unhalt; 500277177Srrs double un, par, res; 501277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 502277177Srrs partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP"); 503277177Srrs if (pos != -1) { 504277177Srrs par = partial->vals[pos] * 1.0; 505277177Srrs un = unhalt->vals[pos] * 1.0; 506277177Srrs } else { 507277177Srrs par = partial->sum * 1.0; 508277177Srrs un = unhalt->sum * 1.0; 509277177Srrs } 510277177Srrs res = par/un; 511277177Srrs ret = printf("%1.3f", res); 512277177Srrs return(ret); 513277177Srrs} 514277177Srrs 515277177Srrsstatic int 516277177Srrsbr_mispredict(struct counters *cpu, int pos) 517277177Srrs{ 518277177Srrs struct counters *brctr; 519277177Srrs struct counters *unhalt; 520277177Srrs int ret; 521277177Srrs/* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 522277177Srrs double br, un, con, res; 523277177Srrs con = 20.0; 524277177Srrs 525277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 526277177Srrs brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 527277177Srrs if (pos != -1) { 528277177Srrs br = brctr->vals[pos] * 1.0; 529277177Srrs un = unhalt->vals[pos] * 1.0; 530277177Srrs } else { 531277177Srrs br = brctr->sum * 1.0; 532277177Srrs un = unhalt->sum * 1.0; 533277177Srrs } 534277177Srrs res = (con * br)/un; 535277177Srrs ret = printf("%1.3f", res); 536277177Srrs return(ret); 537277177Srrs} 538277177Srrs 539277177Srrsstatic int 540277177Srrsbr_mispredictib(struct counters *cpu, int pos) 541277177Srrs{ 542277177Srrs struct counters *brctr; 543277177Srrs struct counters *unhalt; 544277177Srrs struct counters *clear, *clear2, *clear3; 545277177Srrs struct counters *uops; 546277177Srrs struct counters *recv; 547277177Srrs struct counters *iss; 548277177Srrs/* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/ 549277177Srrs int ret; 550277177Srrs /* 551277177Srrs * (BR_MISP_RETIRED.ALL_BRANCHES / 552277177Srrs * (BR_MISP_RETIRED.ALL_BRANCHES + 553277177Srrs * MACHINE_CLEAR.COUNT) * 554277177Srrs * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD))) 555277177Srrs * 556277177Srrs */ 557277177Srrs double br, cl, cl2, cl3, uo, re, un, con, res, is; 558277177Srrs con = 4.0; 559277177Srrs 560277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 561277177Srrs brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 562277177Srrs clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 563277177Srrs clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 564277177Srrs clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 565277177Srrs uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 566277177Srrs iss = find_counter(cpu, "UOPS_ISSUED.ANY"); 567277177Srrs recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); 568277177Srrs if (pos != -1) { 569277177Srrs br = brctr->vals[pos] * 1.0; 570277177Srrs cl = clear->vals[pos] * 1.0; 571277177Srrs cl2 = clear2->vals[pos] * 1.0; 572277177Srrs cl3 = clear3->vals[pos] * 1.0; 573277177Srrs uo = uops->vals[pos] * 1.0; 574277177Srrs re = recv->vals[pos] * 1.0; 575277177Srrs is = iss->vals[pos] * 1.0; 576277177Srrs un = unhalt->vals[pos] * 1.0; 577277177Srrs } else { 578277177Srrs br = brctr->sum * 1.0; 579277177Srrs cl = clear->sum * 1.0; 580277177Srrs cl2 = clear2->sum * 1.0; 581277177Srrs cl3 = clear3->sum * 1.0; 582277177Srrs uo = uops->sum * 1.0; 583277177Srrs re = recv->sum * 1.0; 584277177Srrs is = iss->sum * 1.0; 585277177Srrs un = unhalt->sum * 1.0; 586277177Srrs } 587277177Srrs res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un))); 588277177Srrs ret = printf("%1.3f", res); 589277177Srrs return(ret); 590277177Srrs} 591277177Srrs 592277177Srrsstatic int 593277177Srrssplitloadib(struct counters *cpu, int pos) 594277177Srrs{ 595277177Srrs int ret; 596277177Srrs struct counters *mem; 597277177Srrs struct counters *l1d, *ldblock; 598277177Srrs struct counters *unhalt; 599277177Srrs double un, memd, res, l1, ldb; 600277177Srrs /* 601277177Srrs * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P 602277177Srrs * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 603277177Srrs */ 604277177Srrs 605277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 606277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS"); 607277177Srrs l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING"); 608277177Srrs ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR"); 609277177Srrs if (pos != -1) { 610277177Srrs memd = mem->vals[pos] * 1.0; 611277177Srrs l1 = l1d->vals[pos] * 1.0; 612277177Srrs ldb = ldblock->vals[pos] * 1.0; 613277177Srrs un = unhalt->vals[pos] * 1.0; 614277177Srrs } else { 615277177Srrs memd = mem->sum * 1.0; 616277177Srrs l1 = l1d->sum * 1.0; 617277177Srrs ldb = ldblock->sum * 1.0; 618277177Srrs un = unhalt->sum * 1.0; 619277177Srrs } 620277177Srrs res = ((l1 / memd) * ldb)/un; 621277177Srrs ret = printf("%1.3f", res); 622277177Srrs return(ret); 623277177Srrs} 624277177Srrs 625277177Srrsstatic int 626277177Srrssplitload(struct counters *cpu, int pos) 627277177Srrs{ 628277177Srrs int ret; 629277177Srrs struct counters *mem; 630277177Srrs struct counters *unhalt; 631277177Srrs double con, un, memd, res; 632277177Srrs/* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ 633277177Srrs 634277177Srrs con = 5.0; 635277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 636277177Srrs mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS"); 637277177Srrs if (pos != -1) { 638277177Srrs memd = mem->vals[pos] * 1.0; 639277177Srrs un = unhalt->vals[pos] * 1.0; 640277177Srrs } else { 641277177Srrs memd = mem->sum * 1.0; 642277177Srrs un = unhalt->sum * 1.0; 643277177Srrs } 644277177Srrs res = (memd * con)/un; 645277177Srrs ret = printf("%1.3f", res); 646277177Srrs return(ret); 647277177Srrs} 648277177Srrs 649277177Srrsstatic int 650277177Srrssplitstore(struct counters *cpu, int pos) 651277177Srrs{ 652277177Srrs /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */ 653277177Srrs int ret; 654277177Srrs struct counters *mem_split; 655277177Srrs struct counters *mem_stores; 656277177Srrs double memsplit, memstore, res; 657277177Srrs mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES"); 658277177Srrs mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES"); 659277177Srrs if (pos != -1) { 660277177Srrs memsplit = mem_split->vals[pos] * 1.0; 661277177Srrs memstore = mem_stores->vals[pos] * 1.0; 662277177Srrs } else { 663277177Srrs memsplit = mem_split->sum * 1.0; 664277177Srrs memstore = mem_stores->sum * 1.0; 665277177Srrs } 666277177Srrs res = memsplit/memstore; 667277177Srrs ret = printf("%1.3f", res); 668277177Srrs return(ret); 669277177Srrs} 670277177Srrs 671277177Srrs 672277177Srrsstatic int 673277177Srrscontested(struct counters *cpu, int pos) 674277177Srrs{ 675277177Srrs /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 676277177Srrs int ret; 677277177Srrs struct counters *mem; 678277177Srrs struct counters *unhalt; 679277177Srrs double con, un, memd, res; 680277177Srrs 681277177Srrs con = 60.0; 682277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 683277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 684277177Srrs if (pos != -1) { 685277177Srrs memd = mem->vals[pos] * 1.0; 686277177Srrs un = unhalt->vals[pos] * 1.0; 687277177Srrs } else { 688277177Srrs memd = mem->sum * 1.0; 689277177Srrs un = unhalt->sum * 1.0; 690277177Srrs } 691277177Srrs res = (memd * con)/un; 692277177Srrs ret = printf("%1.3f", res); 693277177Srrs return(ret); 694277177Srrs} 695277177Srrs 696277177Srrsstatic int 697277177Srrscontested_has(struct counters *cpu, int pos) 698277177Srrs{ 699277177Srrs /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 700277177Srrs int ret; 701277177Srrs struct counters *mem; 702277177Srrs struct counters *unhalt; 703277177Srrs double con, un, memd, res; 704277177Srrs 705277177Srrs con = 84.0; 706277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 707277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 708277177Srrs if (pos != -1) { 709277177Srrs memd = mem->vals[pos] * 1.0; 710277177Srrs un = unhalt->vals[pos] * 1.0; 711277177Srrs } else { 712277177Srrs memd = mem->sum * 1.0; 713277177Srrs un = unhalt->sum * 1.0; 714277177Srrs } 715277177Srrs res = (memd * con)/un; 716277177Srrs ret = printf("%1.3f", res); 717277177Srrs return(ret); 718277177Srrs} 719277177Srrs 720277177Srrs 721277177Srrsstatic int 722277177Srrsblockstoreforward(struct counters *cpu, int pos) 723277177Srrs{ 724277177Srrs /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/ 725277177Srrs int ret; 726277177Srrs struct counters *ldb; 727277177Srrs struct counters *unhalt; 728277177Srrs double con, un, ld, res; 729277177Srrs 730277177Srrs con = 13.0; 731277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 732277177Srrs ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD"); 733277177Srrs if (pos != -1) { 734277177Srrs ld = ldb->vals[pos] * 1.0; 735277177Srrs un = unhalt->vals[pos] * 1.0; 736277177Srrs } else { 737277177Srrs ld = ldb->sum * 1.0; 738277177Srrs un = unhalt->sum * 1.0; 739277177Srrs } 740277177Srrs res = (ld * con)/un; 741277177Srrs ret = printf("%1.3f", res); 742277177Srrs return(ret); 743277177Srrs} 744277177Srrs 745277177Srrsstatic int 746277177Srrscache2(struct counters *cpu, int pos) 747277177Srrs{ 748277177Srrs /* ** Suspect *** 749277177Srrs * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + 750277177Srrs * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 751277177Srrs */ 752277177Srrs int ret; 753277177Srrs struct counters *mem1, *mem2, *mem3; 754277177Srrs struct counters *unhalt; 755277177Srrs double con1, con2, con3, un, me_1, me_2, me_3, res; 756277177Srrs 757277177Srrs con1 = 26.0; 758277177Srrs con2 = 43.0; 759277177Srrs con3 = 60.0; 760277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 761277177Srrs/* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/ 762277177Srrs mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 763277177Srrs mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 764277177Srrs mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 765277177Srrs if (pos != -1) { 766277177Srrs me_1 = mem1->vals[pos] * 1.0; 767277177Srrs me_2 = mem2->vals[pos] * 1.0; 768277177Srrs me_3 = mem3->vals[pos] * 1.0; 769277177Srrs un = unhalt->vals[pos] * 1.0; 770277177Srrs } else { 771277177Srrs me_1 = mem1->sum * 1.0; 772277177Srrs me_2 = mem2->sum * 1.0; 773277177Srrs me_3 = mem3->sum * 1.0; 774277177Srrs un = unhalt->sum * 1.0; 775277177Srrs } 776277177Srrs res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un; 777277177Srrs ret = printf("%1.3f", res); 778277177Srrs return(ret); 779277177Srrs} 780277177Srrs 781277177Srrsstatic int 782277177Srrsdatasharing(struct counters *cpu, int pos) 783277177Srrs{ 784277177Srrs /* 785277177Srrs * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 786277177Srrs */ 787277177Srrs int ret; 788277177Srrs struct counters *mem; 789277177Srrs struct counters *unhalt; 790277177Srrs double con, res, me, un; 791277177Srrs 792277177Srrs con = 43.0; 793277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 794277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 795277177Srrs if (pos != -1) { 796277177Srrs me = mem->vals[pos] * 1.0; 797277177Srrs un = unhalt->vals[pos] * 1.0; 798277177Srrs } else { 799277177Srrs me = mem->sum * 1.0; 800277177Srrs un = unhalt->sum * 1.0; 801277177Srrs } 802277177Srrs res = (me * con)/un; 803277177Srrs ret = printf("%1.3f", res); 804277177Srrs return(ret); 805277177Srrs 806277177Srrs} 807277177Srrs 808277177Srrs 809277177Srrsstatic int 810277177Srrsdatasharing_has(struct counters *cpu, int pos) 811277177Srrs{ 812277177Srrs /* 813277177Srrs * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 814277177Srrs */ 815277177Srrs int ret; 816277177Srrs struct counters *mem; 817277177Srrs struct counters *unhalt; 818277177Srrs double con, res, me, un; 819277177Srrs 820277177Srrs con = 72.0; 821277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 822277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 823277177Srrs if (pos != -1) { 824277177Srrs me = mem->vals[pos] * 1.0; 825277177Srrs un = unhalt->vals[pos] * 1.0; 826277177Srrs } else { 827277177Srrs me = mem->sum * 1.0; 828277177Srrs un = unhalt->sum * 1.0; 829277177Srrs } 830277177Srrs res = (me * con)/un; 831277177Srrs ret = printf("%1.3f", res); 832277177Srrs return(ret); 833277177Srrs 834277177Srrs} 835277177Srrs 836277177Srrs 837277177Srrsstatic int 838277177Srrscache2ib(struct counters *cpu, int pos) 839277177Srrs{ 840277177Srrs /* 841277177Srrs * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 842277177Srrs */ 843277177Srrs int ret; 844277177Srrs struct counters *mem; 845277177Srrs struct counters *unhalt; 846277177Srrs double con, un, me, res; 847277177Srrs 848277177Srrs con = 29.0; 849277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 850277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 851277177Srrs if (pos != -1) { 852277177Srrs me = mem->vals[pos] * 1.0; 853277177Srrs un = unhalt->vals[pos] * 1.0; 854277177Srrs } else { 855277177Srrs me = mem->sum * 1.0; 856277177Srrs un = unhalt->sum * 1.0; 857277177Srrs } 858277177Srrs res = (con * me)/un; 859277177Srrs ret = printf("%1.3f", res); 860277177Srrs return(ret); 861277177Srrs} 862277177Srrs 863277177Srrsstatic int 864277177Srrscache2has(struct counters *cpu, int pos) 865277177Srrs{ 866277177Srrs /* 867277177Srrs * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \ 868277177Srrs * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + 869277177Srrs * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) 870277177Srrs * / CPU_CLK_UNHALTED.THREAD_P 871277177Srrs */ 872277177Srrs int ret; 873277177Srrs struct counters *mem1, *mem2, *mem3; 874277177Srrs struct counters *unhalt; 875277177Srrs double con1, con2, con3, un, me1, me2, me3, res; 876277177Srrs 877277177Srrs con1 = 36.0; 878277177Srrs con2 = 72.0; 879277177Srrs con3 = 84.0; 880277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 881277177Srrs mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 882277177Srrs mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 883277177Srrs mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 884277177Srrs if (pos != -1) { 885277177Srrs me1 = mem1->vals[pos] * 1.0; 886277177Srrs me2 = mem2->vals[pos] * 1.0; 887277177Srrs me3 = mem3->vals[pos] * 1.0; 888277177Srrs un = unhalt->vals[pos] * 1.0; 889277177Srrs } else { 890277177Srrs me1 = mem1->sum * 1.0; 891277177Srrs me2 = mem2->sum * 1.0; 892277177Srrs me3 = mem3->sum * 1.0; 893277177Srrs un = unhalt->sum * 1.0; 894277177Srrs } 895277177Srrs res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un; 896277177Srrs ret = printf("%1.3f", res); 897277177Srrs return(ret); 898277177Srrs} 899277177Srrs 900277177Srrsstatic int 901277177Srrscache1(struct counters *cpu, int pos) 902277177Srrs{ 903277177Srrs /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 904277177Srrs int ret; 905277177Srrs struct counters *mem; 906277177Srrs struct counters *unhalt; 907277177Srrs double con, un, me, res; 908277177Srrs 909277177Srrs con = 180.0; 910277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 911277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS"); 912277177Srrs if (pos != -1) { 913277177Srrs me = mem->vals[pos] * 1.0; 914277177Srrs un = unhalt->vals[pos] * 1.0; 915277177Srrs } else { 916277177Srrs me = mem->sum * 1.0; 917277177Srrs un = unhalt->sum * 1.0; 918277177Srrs } 919277177Srrs res = (me * con)/un; 920277177Srrs ret = printf("%1.3f", res); 921277177Srrs return(ret); 922277177Srrs} 923277177Srrs 924277177Srrsstatic int 925277177Srrscache1ib(struct counters *cpu, int pos) 926277177Srrs{ 927277177Srrs /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 928277177Srrs int ret; 929277177Srrs struct counters *mem; 930277177Srrs struct counters *unhalt; 931277177Srrs double con, un, me, res; 932277177Srrs 933277177Srrs con = 180.0; 934277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 935277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM"); 936277177Srrs if (pos != -1) { 937277177Srrs me = mem->vals[pos] * 1.0; 938277177Srrs un = unhalt->vals[pos] * 1.0; 939277177Srrs } else { 940277177Srrs me = mem->sum * 1.0; 941277177Srrs un = unhalt->sum * 1.0; 942277177Srrs } 943277177Srrs res = (me * con)/un; 944277177Srrs ret = printf("%1.3f", res); 945277177Srrs return(ret); 946277177Srrs} 947277177Srrs 948277177Srrs 949277177Srrsstatic int 950277177Srrsdtlb_missload(struct counters *cpu, int pos) 951277177Srrs{ 952277177Srrs /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */ 953277177Srrs int ret; 954277177Srrs struct counters *dtlb_m, *dtlb_d; 955277177Srrs struct counters *unhalt; 956277177Srrs double con, un, d1, d2, res; 957277177Srrs 958277177Srrs con = 7.0; 959277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 960277177Srrs dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT"); 961277177Srrs dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION"); 962277177Srrs if (pos != -1) { 963277177Srrs d1 = dtlb_m->vals[pos] * 1.0; 964277177Srrs d2 = dtlb_d->vals[pos] * 1.0; 965277177Srrs un = unhalt->vals[pos] * 1.0; 966277177Srrs } else { 967277177Srrs d1 = dtlb_m->sum * 1.0; 968277177Srrs d2 = dtlb_d->sum * 1.0; 969277177Srrs un = unhalt->sum * 1.0; 970277177Srrs } 971277177Srrs res = ((d1 * con) + d2)/un; 972277177Srrs ret = printf("%1.3f", res); 973277177Srrs return(ret); 974277177Srrs} 975277177Srrs 976277177Srrsstatic int 977277177Srrsdtlb_missstore(struct counters *cpu, int pos) 978277177Srrs{ 979277177Srrs /* 980277177Srrs * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / 981277177Srrs * CPU_CLK_UNHALTED.THREAD_P (t >= .1) 982277177Srrs */ 983277177Srrs int ret; 984277177Srrs struct counters *dtsb_m, *dtsb_d; 985277177Srrs struct counters *unhalt; 986277177Srrs double con, un, d1, d2, res; 987277177Srrs 988277177Srrs con = 7.0; 989277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 990277177Srrs dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT"); 991277177Srrs dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION"); 992277177Srrs if (pos != -1) { 993277177Srrs d1 = dtsb_m->vals[pos] * 1.0; 994277177Srrs d2 = dtsb_d->vals[pos] * 1.0; 995277177Srrs un = unhalt->vals[pos] * 1.0; 996277177Srrs } else { 997277177Srrs d1 = dtsb_m->sum * 1.0; 998277177Srrs d2 = dtsb_d->sum * 1.0; 999277177Srrs un = unhalt->sum * 1.0; 1000277177Srrs } 1001277177Srrs res = ((d1 * con) + d2)/un; 1002277177Srrs ret = printf("%1.3f", res); 1003277177Srrs return(ret); 1004277177Srrs} 1005277177Srrs 1006277177Srrsstatic int 1007277177Srrsitlb_miss(struct counters *cpu, int pos) 1008277177Srrs{ 1009277177Srrs /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */ 1010277177Srrs int ret; 1011277177Srrs struct counters *itlb; 1012277177Srrs struct counters *unhalt; 1013277177Srrs double un, d1, res; 1014277177Srrs 1015277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1016277177Srrs itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1017277177Srrs if (pos != -1) { 1018277177Srrs d1 = itlb->vals[pos] * 1.0; 1019277177Srrs un = unhalt->vals[pos] * 1.0; 1020277177Srrs } else { 1021277177Srrs d1 = itlb->sum * 1.0; 1022277177Srrs un = unhalt->sum * 1.0; 1023277177Srrs } 1024277177Srrs res = d1/un; 1025277177Srrs ret = printf("%1.3f", res); 1026277177Srrs return(ret); 1027277177Srrs} 1028277177Srrs 1029277177Srrsstatic int 1030277177Srrsicache_miss(struct counters *cpu, int pos) 1031277177Srrs{ 1032277177Srrs /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */ 1033277177Srrs 1034277177Srrs int ret; 1035277177Srrs struct counters *itlb, *icache; 1036277177Srrs struct counters *unhalt; 1037277177Srrs double un, d1, ic, res; 1038277177Srrs 1039277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1040277177Srrs itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1041277177Srrs icache = find_counter(cpu, "ICACHE.IFETCH_STALL"); 1042277177Srrs if (pos != -1) { 1043277177Srrs d1 = itlb->vals[pos] * 1.0; 1044277177Srrs ic = icache->vals[pos] * 1.0; 1045277177Srrs un = unhalt->vals[pos] * 1.0; 1046277177Srrs } else { 1047277177Srrs d1 = itlb->sum * 1.0; 1048277177Srrs ic = icache->sum * 1.0; 1049277177Srrs un = unhalt->sum * 1.0; 1050277177Srrs } 1051277177Srrs res = (ic-d1)/un; 1052277177Srrs ret = printf("%1.3f", res); 1053277177Srrs return(ret); 1054277177Srrs 1055277177Srrs} 1056277177Srrs 1057277177Srrsstatic int 1058277177Srrsicache_miss_has(struct counters *cpu, int pos) 1059277177Srrs{ 1060277177Srrs /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */ 1061277177Srrs 1062277177Srrs int ret; 1063277177Srrs struct counters *icache; 1064277177Srrs struct counters *unhalt; 1065277177Srrs double un, con, ic, res; 1066277177Srrs 1067277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1068277177Srrs icache = find_counter(cpu, "ICACHE.MISSES"); 1069277177Srrs con = 36.0; 1070277177Srrs if (pos != -1) { 1071277177Srrs ic = icache->vals[pos] * 1.0; 1072277177Srrs un = unhalt->vals[pos] * 1.0; 1073277177Srrs } else { 1074277177Srrs ic = icache->sum * 1.0; 1075277177Srrs un = unhalt->sum * 1.0; 1076277177Srrs } 1077277177Srrs res = (con * ic)/un; 1078277177Srrs ret = printf("%1.3f", res); 1079277177Srrs return(ret); 1080277177Srrs 1081277177Srrs} 1082277177Srrs 1083277177Srrsstatic int 1084277177Srrslcp_stall(struct counters *cpu, int pos) 1085277177Srrs{ 1086277177Srrs /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */ 1087277177Srrs int ret; 1088277177Srrs struct counters *ild; 1089277177Srrs struct counters *unhalt; 1090277177Srrs double un, d1, res; 1091277177Srrs 1092277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1093277177Srrs ild = find_counter(cpu, "ILD_STALL.LCP"); 1094277177Srrs if (pos != -1) { 1095277177Srrs d1 = ild->vals[pos] * 1.0; 1096277177Srrs un = unhalt->vals[pos] * 1.0; 1097277177Srrs } else { 1098277177Srrs d1 = ild->sum * 1.0; 1099277177Srrs un = unhalt->sum * 1.0; 1100277177Srrs } 1101277177Srrs res = d1/un; 1102277177Srrs ret = printf("%1.3f", res); 1103277177Srrs return(ret); 1104277177Srrs 1105277177Srrs} 1106277177Srrs 1107277177Srrs 1108277177Srrsstatic int 1109277177Srrsfrontendstall(struct counters *cpu, int pos) 1110277177Srrs{ 1111277177Srrs /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */ 1112277177Srrs int ret; 1113277177Srrs struct counters *idq; 1114277177Srrs struct counters *unhalt; 1115277177Srrs double con, un, id, res; 1116277177Srrs 1117277177Srrs con = 4.0; 1118277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1119277177Srrs idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE"); 1120277177Srrs if (pos != -1) { 1121277177Srrs id = idq->vals[pos] * 1.0; 1122277177Srrs un = unhalt->vals[pos] * 1.0; 1123277177Srrs } else { 1124277177Srrs id = idq->sum * 1.0; 1125277177Srrs un = unhalt->sum * 1.0; 1126277177Srrs } 1127277177Srrs res = id/(un * con); 1128277177Srrs ret = printf("%1.3f", res); 1129277177Srrs return(ret); 1130277177Srrs} 1131277177Srrs 1132277177Srrsstatic int 1133277177Srrsclears(struct counters *cpu, int pos) 1134277177Srrs{ 1135277177Srrs /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 ) 1136277177Srrs * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/ 1137277177Srrs 1138277177Srrs int ret; 1139277177Srrs struct counters *clr1, *clr2, *clr3; 1140277177Srrs struct counters *unhalt; 1141277177Srrs double con, un, cl1, cl2, cl3, res; 1142277177Srrs 1143277177Srrs con = 100.0; 1144277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1145277177Srrs clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 1146277177Srrs clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 1147277177Srrs clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 1148277177Srrs 1149277177Srrs if (pos != -1) { 1150277177Srrs cl1 = clr1->vals[pos] * 1.0; 1151277177Srrs cl2 = clr2->vals[pos] * 1.0; 1152277177Srrs cl3 = clr3->vals[pos] * 1.0; 1153277177Srrs un = unhalt->vals[pos] * 1.0; 1154277177Srrs } else { 1155277177Srrs cl1 = clr1->sum * 1.0; 1156277177Srrs cl2 = clr2->sum * 1.0; 1157277177Srrs cl3 = clr3->sum * 1.0; 1158277177Srrs un = unhalt->sum * 1.0; 1159277177Srrs } 1160277177Srrs res = ((cl1 + cl2 + cl3) * con)/un; 1161277177Srrs ret = printf("%1.3f", res); 1162277177Srrs return(ret); 1163277177Srrs} 1164277177Srrs 1165277177Srrsstatic int 1166277177Srrsmicroassist(struct counters *cpu, int pos) 1167277177Srrs{ 1168277177Srrs /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */ 1169277177Srrs int ret; 1170277177Srrs struct counters *idq; 1171277177Srrs struct counters *unhalt; 1172277177Srrs double un, id, res, con; 1173277177Srrs 1174277177Srrs con = 4.0; 1175277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1176277177Srrs idq = find_counter(cpu, "IDQ.MS_UOPS"); 1177277177Srrs if (pos != -1) { 1178277177Srrs id = idq->vals[pos] * 1.0; 1179277177Srrs un = unhalt->vals[pos] * 1.0; 1180277177Srrs } else { 1181277177Srrs id = idq->sum * 1.0; 1182277177Srrs un = unhalt->sum * 1.0; 1183277177Srrs } 1184277177Srrs res = id/(un * con); 1185277177Srrs ret = printf("%1.3f", res); 1186277177Srrs return(ret); 1187277177Srrs} 1188277177Srrs 1189277177Srrs 1190277177Srrsstatic int 1191277177Srrsaliasing(struct counters *cpu, int pos) 1192277177Srrs{ 1193277177Srrs /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ 1194277177Srrs int ret; 1195277177Srrs struct counters *ld; 1196277177Srrs struct counters *unhalt; 1197277177Srrs double un, lds, con, res; 1198277177Srrs 1199277177Srrs con = 5.0; 1200277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1201277177Srrs ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); 1202277177Srrs if (pos != -1) { 1203277177Srrs lds = ld->vals[pos] * 1.0; 1204277177Srrs un = unhalt->vals[pos] * 1.0; 1205277177Srrs } else { 1206277177Srrs lds = ld->sum * 1.0; 1207277177Srrs un = unhalt->sum * 1.0; 1208277177Srrs } 1209277177Srrs res = (lds * con)/un; 1210277177Srrs ret = printf("%1.3f", res); 1211277177Srrs return(ret); 1212277177Srrs} 1213277177Srrs 1214277177Srrsstatic int 1215277177Srrsfpassists(struct counters *cpu, int pos) 1216277177Srrs{ 1217277177Srrs /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */ 1218277177Srrs int ret; 1219277177Srrs struct counters *fp; 1220277177Srrs struct counters *inst; 1221277177Srrs double un, fpd, res; 1222277177Srrs 1223277177Srrs inst = find_counter(cpu, "INST_RETIRED.ANY_P"); 1224277177Srrs fp = find_counter(cpu, "FP_ASSIST.ANY"); 1225277177Srrs if (pos != -1) { 1226277177Srrs fpd = fp->vals[pos] * 1.0; 1227277177Srrs un = inst->vals[pos] * 1.0; 1228277177Srrs } else { 1229277177Srrs fpd = fp->sum * 1.0; 1230277177Srrs un = inst->sum * 1.0; 1231277177Srrs } 1232277177Srrs res = fpd/un; 1233277177Srrs ret = printf("%1.3f", res); 1234277177Srrs return(ret); 1235277177Srrs} 1236277177Srrs 1237277177Srrsstatic int 1238277177Srrsotherassistavx(struct counters *cpu, int pos) 1239277177Srrs{ 1240277177Srrs /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1241277177Srrs int ret; 1242277177Srrs struct counters *oth; 1243277177Srrs struct counters *unhalt; 1244277177Srrs double un, ot, con, res; 1245277177Srrs 1246277177Srrs con = 75.0; 1247277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1248277177Srrs oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE"); 1249277177Srrs if (pos != -1) { 1250277177Srrs ot = oth->vals[pos] * 1.0; 1251277177Srrs un = unhalt->vals[pos] * 1.0; 1252277177Srrs } else { 1253277177Srrs ot = oth->sum * 1.0; 1254277177Srrs un = unhalt->sum * 1.0; 1255277177Srrs } 1256277177Srrs res = (ot * con)/un; 1257277177Srrs ret = printf("%1.3f", res); 1258277177Srrs return(ret); 1259277177Srrs} 1260277177Srrs 1261277177Srrsstatic int 1262277177Srrsotherassistsse(struct counters *cpu, int pos) 1263277177Srrs{ 1264277177Srrs 1265277177Srrs int ret; 1266277177Srrs struct counters *oth; 1267277177Srrs struct counters *unhalt; 1268277177Srrs double un, ot, con, res; 1269277177Srrs 1270277177Srrs /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1271277177Srrs con = 75.0; 1272277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1273277177Srrs oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX"); 1274277177Srrs if (pos != -1) { 1275277177Srrs ot = oth->vals[pos] * 1.0; 1276277177Srrs un = unhalt->vals[pos] * 1.0; 1277277177Srrs } else { 1278277177Srrs ot = oth->sum * 1.0; 1279277177Srrs un = unhalt->sum * 1.0; 1280277177Srrs } 1281277177Srrs res = (ot * con)/un; 1282277177Srrs ret = printf("%1.3f", res); 1283277177Srrs return(ret); 1284277177Srrs} 1285277177Srrs 1286277177Srrsstatic int 1287277177Srrsefficiency1(struct counters *cpu, int pos) 1288277177Srrs{ 1289277177Srrs 1290277177Srrs int ret; 1291277177Srrs struct counters *uops; 1292277177Srrs struct counters *unhalt; 1293277177Srrs double un, ot, con, res; 1294277177Srrs 1295277177Srrs /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/ 1296277177Srrs con = 4.0; 1297277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1298277177Srrs uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 1299277177Srrs if (pos != -1) { 1300277177Srrs ot = uops->vals[pos] * 1.0; 1301277177Srrs un = unhalt->vals[pos] * 1.0; 1302277177Srrs } else { 1303277177Srrs ot = uops->sum * 1.0; 1304277177Srrs un = unhalt->sum * 1.0; 1305277177Srrs } 1306277177Srrs res = ot/(con * un); 1307277177Srrs ret = printf("%1.3f", res); 1308277177Srrs return(ret); 1309277177Srrs} 1310277177Srrs 1311277177Srrsstatic int 1312277177Srrsefficiency2(struct counters *cpu, int pos) 1313277177Srrs{ 1314277177Srrs 1315277177Srrs int ret; 1316277177Srrs struct counters *uops; 1317277177Srrs struct counters *unhalt; 1318277177Srrs double un, ot, res; 1319277177Srrs 1320277177Srrs /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/ 1321277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1322277177Srrs uops = find_counter(cpu, "INST_RETIRED.ANY_P"); 1323277177Srrs if (pos != -1) { 1324277177Srrs ot = uops->vals[pos] * 1.0; 1325277177Srrs un = unhalt->vals[pos] * 1.0; 1326277177Srrs } else { 1327277177Srrs ot = uops->sum * 1.0; 1328277177Srrs un = unhalt->sum * 1.0; 1329277177Srrs } 1330277177Srrs res = un/ot; 1331277177Srrs ret = printf("%1.3f", res); 1332277177Srrs return(ret); 1333277177Srrs} 1334277177Srrs 1335277177Srrs#define SANDY_BRIDGE_COUNT 20 1336277177Srrsstatic struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = { 1337277177Srrs/*01*/ { "allocstall1", "thresh > .05", 1338277177Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1", 1339277177Srrs allocstall1 }, 1340277177Srrs/*02*/ { "allocstall2", "thresh > .05", 1341277177Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1", 1342277177Srrs allocstall2 }, 1343277177Srrs/*03*/ { "br_miss", "thresh >= .2", 1344277177Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1345277177Srrs br_mispredict }, 1346277177Srrs/*04*/ { "splitload", "thresh >= .1", 1347277177Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", 1348277177Srrs splitload }, 1349277177Srrs/*05*/ { "splitstore", "thresh >= .01", 1350277177Srrs "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1351277177Srrs splitstore }, 1352277177Srrs/*06*/ { "contested", "thresh >= .05", 1353277177Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1354277177Srrs contested }, 1355277177Srrs/*07*/ { "blockstorefwd", "thresh >= .05", 1356277177Srrs "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1357277177Srrs blockstoreforward }, 1358277177Srrs/*08*/ { "cache2", "thresh >= .2", 1359277177Srrs "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1360277177Srrs cache2 }, 1361277177Srrs/*09*/ { "cache1", "thresh >= .2", 1362277177Srrs "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1363277177Srrs cache1 }, 1364277177Srrs/*10*/ { "dtlbmissload", "thresh >= .1", 1365277177Srrs "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1366277177Srrs dtlb_missload }, 1367277177Srrs/*11*/ { "dtlbmissstore", "thresh >= .05", 1368277177Srrs "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1369277177Srrs dtlb_missstore }, 1370277177Srrs/*12*/ { "frontendstall", "thresh >= .15", 1371277177Srrs "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1372277177Srrs frontendstall }, 1373277177Srrs/*13*/ { "clears", "thresh >= .02", 1374277177Srrs "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1375277177Srrs clears }, 1376277177Srrs/*14*/ { "microassist", "thresh >= .05", 1377277177Srrs "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1378277177Srrs microassist }, 1379277177Srrs/*15*/ { "aliasing_4k", "thresh >= .1", 1380277177Srrs "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1381277177Srrs aliasing }, 1382277177Srrs/*16*/ { "fpassist", "look for a excessive value", 1383277177Srrs "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1384277177Srrs fpassists }, 1385277177Srrs/*17*/ { "otherassistavx", "look for a excessive value", 1386277177Srrs "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1387277177Srrs otherassistavx }, 1388277177Srrs/*18*/ { "otherassistsse", "look for a excessive value", 1389277177Srrs "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1390277177Srrs otherassistsse }, 1391277177Srrs/*19*/ { "eff1", "thresh < .9", 1392277177Srrs "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1393277177Srrs efficiency1 }, 1394277177Srrs/*20*/ { "eff2", "thresh > 1.0", 1395277177Srrs "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1396277177Srrs efficiency2 }, 1397277177Srrs}; 1398277177Srrs 1399277177Srrs 1400277177Srrs#define IVY_BRIDGE_COUNT 21 1401277177Srrsstatic struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = { 1402277177Srrs/*1*/ { "eff1", "thresh < .75", 1403277177Srrs "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1404277177Srrs efficiency1 }, 1405277177Srrs/*2*/ { "eff2", "thresh > 1.0", 1406277177Srrs "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1407277177Srrs efficiency2 }, 1408277177Srrs/*3*/ { "itlbmiss", "thresh > .05", 1409277177Srrs "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1410277177Srrs itlb_miss }, 1411277177Srrs/*4*/ { "icachemiss", "thresh > .05", 1412277177Srrs "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1413277177Srrs icache_miss }, 1414277177Srrs/*5*/ { "lcpstall", "thresh > .05", 1415277177Srrs "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1416277177Srrs lcp_stall }, 1417277177Srrs/*6*/ { "cache1", "thresh >= .2", 1418277177Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1419277177Srrs cache1ib }, 1420277177Srrs/*7*/ { "cache2", "thresh >= .2", 1421277177Srrs "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1422277177Srrs cache2ib }, 1423277177Srrs/*8*/ { "contested", "thresh >= .05", 1424277177Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1425277177Srrs contested }, 1426277177Srrs/*9*/ { "datashare", "thresh >= .05", 1427277177Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1428277177Srrs datasharing }, 1429277177Srrs/*10*/ { "blockstorefwd", "thresh >= .05", 1430277177Srrs "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1431277177Srrs blockstoreforward }, 1432277177Srrs/*11*/ { "splitload", "thresh >= .1", 1433277177Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 1434277177Srrs splitloadib }, 1435277177Srrs/*12*/ { "splitstore", "thresh >= .01", 1436277177Srrs "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1437277177Srrs splitstore }, 1438277177Srrs/*13*/ { "aliasing_4k", "thresh >= .1", 1439277177Srrs "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1440277177Srrs aliasing }, 1441277177Srrs/*14*/ { "dtlbmissload", "thresh >= .1", 1442277177Srrs "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1443277177Srrs dtlb_missload }, 1444277177Srrs/*15*/ { "dtlbmissstore", "thresh >= .05", 1445277177Srrs "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1446277177Srrs dtlb_missstore }, 1447277177Srrs/*16*/ { "br_miss", "thresh >= .2", 1448277177Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", 1449277177Srrs br_mispredictib }, 1450277177Srrs/*17*/ { "clears", "thresh >= .02", 1451277177Srrs "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1452277177Srrs clears }, 1453277177Srrs/*18*/ { "microassist", "thresh >= .05", 1454277177Srrs "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1455277177Srrs microassist }, 1456277177Srrs/*19*/ { "fpassist", "look for a excessive value", 1457277177Srrs "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1458277177Srrs fpassists }, 1459277177Srrs/*20*/ { "otherassistavx", "look for a excessive value", 1460277177Srrs "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1461277177Srrs otherassistavx }, 1462277177Srrs/*21*/ { "otherassistsse", "look for a excessive value", 1463277177Srrs "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1464277177Srrs otherassistsse }, 1465277177Srrs}; 1466277177Srrs 1467277177Srrs#define HASWELL_COUNT 20 1468277177Srrsstatic struct cpu_entry haswell[HASWELL_COUNT] = { 1469277177Srrs/*1*/ { "eff1", "thresh < .75", 1470277177Srrs "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1471277177Srrs efficiency1 }, 1472277177Srrs/*2*/ { "eff2", "thresh > 1.0", 1473277177Srrs "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1474277177Srrs efficiency2 }, 1475277177Srrs/*3*/ { "itlbmiss", "thresh > .05", 1476277177Srrs "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1477277177Srrs itlb_miss }, 1478277177Srrs/*4*/ { "icachemiss", "thresh > .05", 1479277177Srrs "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1", 1480277177Srrs icache_miss_has }, 1481277177Srrs/*5*/ { "lcpstall", "thresh > .05", 1482277177Srrs "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1483277177Srrs lcp_stall }, 1484277177Srrs/*6*/ { "cache1", "thresh >= .2", 1485277177Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1486277177Srrs cache1ib }, 1487277177Srrs/*7*/ { "cache2", "thresh >= .2", 1488277177Srrs "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1489277177Srrs cache2has }, 1490277177Srrs/*8*/ { "contested", "thresh >= .05", 1491277177Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1492277177Srrs contested_has }, 1493277177Srrs/*9*/ { "datashare", "thresh >= .05", 1494277177Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1495277177Srrs datasharing_has }, 1496277177Srrs/*10*/ { "blockstorefwd", "thresh >= .05", 1497277177Srrs "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1498277177Srrs blockstoreforward }, 1499277177Srrs/*11*/ { "splitload", "thresh >= .1", 1500277177Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", 1501277177Srrs splitload }, 1502277177Srrs/*12*/ { "splitstore", "thresh >= .01", 1503277177Srrs "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1504277177Srrs splitstore }, 1505277177Srrs/*13*/ { "aliasing_4k", "thresh >= .1", 1506277177Srrs "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1507277177Srrs aliasing }, 1508277177Srrs/*14*/ { "dtlbmissload", "thresh >= .1", 1509277177Srrs "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1510277177Srrs dtlb_missload }, 1511277177Srrs/*15*/ { "br_miss", "thresh >= .2", 1512277177Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1513277177Srrs br_mispredict }, 1514277177Srrs/*16*/ { "clears", "thresh >= .02", 1515277177Srrs "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1516277177Srrs clears }, 1517277177Srrs/*17*/ { "microassist", "thresh >= .05", 1518277177Srrs "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1519277177Srrs microassist }, 1520277177Srrs/*18*/ { "fpassist", "look for a excessive value", 1521277177Srrs "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1522277177Srrs fpassists }, 1523277177Srrs/*19*/ { "otherassistavx", "look for a excessive value", 1524277177Srrs "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1525277177Srrs otherassistavx }, 1526277177Srrs/*20*/ { "otherassistsse", "look for a excessive value", 1527277177Srrs "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1528277177Srrs otherassistsse }, 1529277177Srrs}; 1530277177Srrs 1531277177Srrs 1532277177Srrsstatic void 1533277177Srrsset_sandybridge(void) 1534277177Srrs{ 1535277177Srrs strcpy(the_cpu.cputype, "SandyBridge PMC"); 1536277177Srrs the_cpu.number = SANDY_BRIDGE_COUNT; 1537277177Srrs the_cpu.ents = sandy_bridge; 1538277177Srrs the_cpu.explain = explain_name_sb; 1539277177Srrs} 1540277177Srrs 1541277177Srrsstatic void 1542277177Srrsset_ivybridge(void) 1543277177Srrs{ 1544277177Srrs strcpy(the_cpu.cputype, "IvyBridge PMC"); 1545277177Srrs the_cpu.number = IVY_BRIDGE_COUNT; 1546277177Srrs the_cpu.ents = ivy_bridge; 1547277177Srrs the_cpu.explain = explain_name_ib; 1548277177Srrs} 1549277177Srrs 1550277177Srrs 1551277177Srrsstatic void 1552277177Srrsset_haswell(void) 1553277177Srrs{ 1554277177Srrs strcpy(the_cpu.cputype, "HASWELL PMC"); 1555277177Srrs the_cpu.number = HASWELL_COUNT; 1556277177Srrs the_cpu.ents = haswell; 1557277177Srrs the_cpu.explain = explain_name_has; 1558277177Srrs} 1559277177Srrs 1560277177Srrsstatic void 1561277177Srrsset_expression(char *name) 1562277177Srrs{ 1563277177Srrs int found = 0, i; 1564277177Srrs for(i=0 ; i< the_cpu.number; i++) { 1565277177Srrs if (strcmp(name, the_cpu.ents[i].name) == 0) { 1566277177Srrs found = 1; 1567277177Srrs expression = the_cpu.ents[i].func; 1568277177Srrs command = the_cpu.ents[i].command; 1569277177Srrs threshold = the_cpu.ents[i].thresh; 1570277177Srrs break; 1571277177Srrs } 1572277177Srrs } 1573277177Srrs if (!found) { 1574277177Srrs printf("For CPU type %s we have no expression:%s\n", 1575277177Srrs the_cpu.cputype, name); 1576277177Srrs exit(-1); 1577277177Srrs } 1578277177Srrs} 1579277177Srrs 1580277177Srrs 1581277177Srrs 1582277177Srrs 1583277177Srrs 1584277177Srrsstatic int 1585277177Srrsvalidate_expression(char *name) 1586277177Srrs{ 1587277177Srrs int i, found; 1588277177Srrs 1589277177Srrs found = 0; 1590277177Srrs for(i=0 ; i< the_cpu.number; i++) { 1591277177Srrs if (strcmp(name, the_cpu.ents[i].name) == 0) { 1592277177Srrs found = 1; 1593277177Srrs break; 1594277177Srrs } 1595277177Srrs } 1596277177Srrs if (!found) { 1597277177Srrs return(-1); 1598277177Srrs } 1599277177Srrs return (0); 1600277177Srrs} 1601277177Srrs 1602277177Srrsstatic void 1603277177Srrsdo_expression(struct counters *cpu, int pos) 1604277177Srrs{ 1605277177Srrs if (expression == NULL) 1606277177Srrs return; 1607277177Srrs (*expression)(cpu, pos); 1608277177Srrs} 1609277177Srrs 1610277177Srrsstatic void 1611277177Srrsprocess_header(int idx, char *p) 1612277177Srrs{ 1613277177Srrs struct counters *up; 1614277177Srrs int i, len, nlen; 1615277177Srrs /* 1616277177Srrs * Given header element idx, at p in 1617277177Srrs * form 's/NN/nameof' 1618277177Srrs * process the entry to pull out the name and 1619277177Srrs * the CPU number. 1620277177Srrs */ 1621277177Srrs if (strncmp(p, "s/", 2)) { 1622277177Srrs printf("Check -- invalid header no s/ in %s\n", 1623277177Srrs p); 1624277177Srrs return; 1625277177Srrs } 1626277177Srrs up = &cnts[idx]; 1627277177Srrs up->cpu = strtol(&p[2], NULL, 10); 1628277177Srrs len = strlen(p); 1629277177Srrs for (i=2; i<len; i++) { 1630277177Srrs if (p[i] == '/') { 1631277177Srrs nlen = strlen(&p[(i+1)]); 1632277177Srrs if (nlen < (MAX_NLEN-1)) { 1633277177Srrs strcpy(up->counter_name, &p[(i+1)]); 1634277177Srrs } else { 1635277177Srrs strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1)); 1636277177Srrs } 1637277177Srrs } 1638277177Srrs } 1639277177Srrs} 1640277177Srrs 1641277177Srrsstatic void 1642277177Srrsbuild_counters_from_header(FILE *io) 1643277177Srrs{ 1644277177Srrs char buffer[8192], *p; 1645277177Srrs int i, len, cnt; 1646277177Srrs size_t mlen; 1647277177Srrs 1648277177Srrs /* We have a new start, lets 1649277177Srrs * setup our headers and cpus. 1650277177Srrs */ 1651277177Srrs if (fgets(buffer, sizeof(buffer), io) == NULL) { 1652277177Srrs printf("First line can't be read from file err:%d\n", errno); 1653277177Srrs return; 1654277177Srrs } 1655277177Srrs /* 1656277177Srrs * Ok output is an array of counters. Once 1657277177Srrs * we start to read the values in we must 1658277177Srrs * put them in there slot to match there CPU and 1659277177Srrs * counter being updated. We create a mass array 1660277177Srrs * of the counters, filling in the CPU and 1661277177Srrs * counter name. 1662277177Srrs */ 1663277177Srrs /* How many do we get? */ 1664277177Srrs len = strlen(buffer); 1665277177Srrs for (i=0, cnt=0; i<len; i++) { 1666277177Srrs if (strncmp(&buffer[i], "s/", 2) == 0) { 1667277177Srrs cnt++; 1668277177Srrs for(;i<len;i++) { 1669277177Srrs if (buffer[i] == ' ') 1670277177Srrs break; 1671277177Srrs } 1672277177Srrs } 1673277177Srrs } 1674277177Srrs mlen = sizeof(struct counters) * cnt; 1675277177Srrs cnts = malloc(mlen); 1676277177Srrs ncnts = cnt; 1677277177Srrs if (cnts == NULL) { 1678277177Srrs printf("No memory err:%d\n", errno); 1679277177Srrs return; 1680277177Srrs } 1681277177Srrs memset(cnts, 0, mlen); 1682277177Srrs for (i=0, cnt=0; i<len; i++) { 1683277177Srrs if (strncmp(&buffer[i], "s/", 2) == 0) { 1684277177Srrs p = &buffer[i]; 1685277177Srrs for(;i<len;i++) { 1686277177Srrs if (buffer[i] == ' ') { 1687277177Srrs buffer[i] = 0; 1688277177Srrs break; 1689277177Srrs } 1690277177Srrs } 1691277177Srrs process_header(cnt, p); 1692277177Srrs cnt++; 1693277177Srrs } 1694277177Srrs } 1695277177Srrs if (verbose) 1696277177Srrs printf("We have %d entries\n", cnt); 1697277177Srrs} 1698277177Srrsextern int max_to_collect; 1699277177Srrsint max_to_collect = MAX_COUNTER_SLOTS; 1700277177Srrs 1701277177Srrsstatic int 1702277177Srrsread_a_line(FILE *io) 1703277177Srrs{ 1704277177Srrs char buffer[8192], *p, *stop; 1705277177Srrs int pos, i; 1706277177Srrs 1707277177Srrs if (fgets(buffer, sizeof(buffer), io) == NULL) { 1708277177Srrs return(0); 1709277177Srrs } 1710277177Srrs p = buffer; 1711277177Srrs for (i=0; i<ncnts; i++) { 1712277177Srrs pos = cnts[i].pos; 1713277177Srrs cnts[i].vals[pos] = strtol(p, &stop, 0); 1714277177Srrs cnts[i].pos++; 1715277177Srrs cnts[i].sum += cnts[i].vals[pos]; 1716277177Srrs p = stop; 1717277177Srrs } 1718277177Srrs return (1); 1719277177Srrs} 1720277177Srrs 1721277177Srrsextern int cpu_count_out; 1722277177Srrsint cpu_count_out=0; 1723277177Srrs 1724277177Srrsstatic void 1725277177Srrsprint_header(void) 1726277177Srrs{ 1727277177Srrs int i, cnt, printed_cnt; 1728277177Srrs 1729277177Srrs printf("*********************************\n"); 1730277177Srrs for(i=0, cnt=0; i<MAX_CPU; i++) { 1731277177Srrs if (glob_cpu[i]) { 1732277177Srrs cnt++; 1733277177Srrs } 1734277177Srrs } 1735277177Srrs cpu_count_out = cnt; 1736277177Srrs for(i=0, printed_cnt=0; i<MAX_CPU; i++) { 1737277177Srrs if (glob_cpu[i]) { 1738277177Srrs printf("CPU%d", i); 1739277177Srrs printed_cnt++; 1740277177Srrs } 1741277177Srrs if (printed_cnt == cnt) { 1742277177Srrs printf("\n"); 1743277177Srrs break; 1744277177Srrs } else { 1745277177Srrs printf("\t"); 1746277177Srrs } 1747277177Srrs } 1748277177Srrs} 1749277177Srrs 1750277177Srrsstatic void 1751277177Srrslace_cpus_together(void) 1752277177Srrs{ 1753277177Srrs int i, j, lace_cpu; 1754277177Srrs struct counters *cpat, *at; 1755277177Srrs 1756277177Srrs for(i=0; i<ncnts; i++) { 1757277177Srrs cpat = &cnts[i]; 1758277177Srrs if (cpat->next_cpu) { 1759277177Srrs /* Already laced in */ 1760277177Srrs continue; 1761277177Srrs } 1762277177Srrs lace_cpu = cpat->cpu; 1763277177Srrs if (lace_cpu >= MAX_CPU) { 1764277177Srrs printf("CPU %d to big\n", lace_cpu); 1765277177Srrs continue; 1766277177Srrs } 1767277177Srrs if (glob_cpu[lace_cpu] == NULL) { 1768277177Srrs glob_cpu[lace_cpu] = cpat; 1769277177Srrs } else { 1770277177Srrs /* Already processed this cpu */ 1771277177Srrs continue; 1772277177Srrs } 1773277177Srrs /* Ok look forward for cpu->cpu and link in */ 1774277177Srrs for(j=(i+1); j<ncnts; j++) { 1775277177Srrs at = &cnts[j]; 1776277177Srrs if (at->next_cpu) { 1777277177Srrs continue; 1778277177Srrs } 1779277177Srrs if (at->cpu == lace_cpu) { 1780277177Srrs /* Found one */ 1781277177Srrs cpat->next_cpu = at; 1782277177Srrs cpat = at; 1783277177Srrs } 1784277177Srrs } 1785277177Srrs } 1786277177Srrs} 1787277177Srrs 1788277177Srrs 1789277177Srrsstatic void 1790277177Srrsprocess_file(char *filename) 1791277177Srrs{ 1792277177Srrs FILE *io; 1793277177Srrs int i; 1794277177Srrs int line_at, not_done; 1795277177Srrs pid_t pid_of_command=0; 1796277177Srrs 1797277177Srrs if (filename == NULL) { 1798277177Srrs io = my_popen(command, "r", &pid_of_command); 1799277177Srrs } else { 1800277177Srrs io = fopen(filename, "r"); 1801277177Srrs if (io == NULL) { 1802277177Srrs printf("Can't process file %s err:%d\n", 1803277177Srrs filename, errno); 1804277177Srrs return; 1805277177Srrs } 1806277177Srrs } 1807277177Srrs build_counters_from_header(io); 1808277177Srrs if (cnts == NULL) { 1809277177Srrs /* Nothing we can do */ 1810277177Srrs printf("Nothing to do -- no counters built\n"); 1811285853Semaste if (io) { 1812285853Semaste fclose(io); 1813285853Semaste } 1814277177Srrs return; 1815277177Srrs } 1816277177Srrs lace_cpus_together(); 1817277177Srrs print_header(); 1818277177Srrs if (verbose) { 1819277177Srrs for (i=0; i<ncnts; i++) { 1820277177Srrs printf("Counter:%s cpu:%d index:%d\n", 1821277177Srrs cnts[i].counter_name, 1822277177Srrs cnts[i].cpu, i); 1823277177Srrs } 1824277177Srrs } 1825277177Srrs line_at = 0; 1826277177Srrs not_done = 1; 1827277177Srrs while(not_done) { 1828277177Srrs if (read_a_line(io)) { 1829277177Srrs line_at++; 1830277177Srrs } else { 1831277177Srrs break; 1832277177Srrs } 1833277177Srrs if (line_at >= max_to_collect) { 1834277177Srrs not_done = 0; 1835277177Srrs } 1836277177Srrs if (filename == NULL) { 1837277177Srrs int cnt; 1838277177Srrs /* For the ones we dynamically open we print now */ 1839277177Srrs for(i=0, cnt=0; i<MAX_CPU; i++) { 1840277177Srrs do_expression(glob_cpu[i], (line_at-1)); 1841277177Srrs cnt++; 1842277177Srrs if (cnt == cpu_count_out) { 1843277177Srrs printf("\n"); 1844277177Srrs break; 1845277177Srrs } else { 1846277177Srrs printf("\t"); 1847277177Srrs } 1848277177Srrs } 1849277177Srrs } 1850277177Srrs } 1851277177Srrs if (filename) { 1852277177Srrs fclose(io); 1853277177Srrs } else { 1854277177Srrs my_pclose(io, pid_of_command); 1855277177Srrs } 1856277177Srrs} 1857277177Srrs#if defined(__amd64__) 1858277177Srrs#define cpuid(in,a,b,c,d)\ 1859277177Srrs asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in)); 1860277177Srrs#else 1861277177Srrs#define cpuid(in, a, b, c, d) 1862277177Srrs#endif 1863277177Srrs 1864277177Srrsstatic void 1865277177Srrsget_cpuid_set(void) 1866277177Srrs{ 1867277177Srrs unsigned long eax, ebx, ecx, edx; 1868277177Srrs int model; 1869277177Srrs pid_t pid_of_command=0; 1870277177Srrs size_t sz, len; 1871277177Srrs FILE *io; 1872277177Srrs char linebuf[1024], *str; 1873277177Srrs 1874277177Srrs eax = ebx = ecx = edx = 0; 1875277177Srrs 1876277177Srrs cpuid(0, eax, ebx, ecx, edx); 1877277177Srrs if (ebx == 0x68747541) { 1878277177Srrs printf("AMD processors are not supported by this program\n"); 1879277177Srrs printf("Sorry\n"); 1880277177Srrs exit(0); 1881277177Srrs } else if (ebx == 0x6972794) { 1882277177Srrs printf("Cyrix processors are not supported by this program\n"); 1883277177Srrs printf("Sorry\n"); 1884277177Srrs exit(0); 1885277177Srrs } else if (ebx == 0x756e6547) { 1886277177Srrs printf("Genuine Intel\n"); 1887277177Srrs } else { 1888277177Srrs printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx); 1889277177Srrs exit(0); 1890277177Srrs } 1891277177Srrs cpuid(1, eax, ebx, ecx, edx); 1892277177Srrs model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4)); 1893277177Srrs printf("CPU model is 0x%x id:0x%lx\n", model, eax); 1894277177Srrs switch (eax & 0xF00) { 1895277177Srrs case 0x500: /* Pentium family processors */ 1896277177Srrs printf("Intel Pentium P5\n"); 1897277177Srrs goto not_supported; 1898277177Srrs break; 1899277177Srrs case 0x600: /* Pentium Pro, Celeron, Pentium II & III */ 1900277177Srrs switch (model) { 1901277177Srrs case 0x1: 1902277177Srrs printf("Intel Pentium P6\n"); 1903277177Srrs goto not_supported; 1904277177Srrs break; 1905277177Srrs case 0x3: 1906277177Srrs case 0x5: 1907277177Srrs printf("Intel PII\n"); 1908277177Srrs goto not_supported; 1909277177Srrs break; 1910277177Srrs case 0x6: case 0x16: 1911277177Srrs printf("Intel CL\n"); 1912277177Srrs goto not_supported; 1913277177Srrs break; 1914277177Srrs case 0x7: case 0x8: case 0xA: case 0xB: 1915277177Srrs printf("Intel PIII\n"); 1916277177Srrs goto not_supported; 1917277177Srrs break; 1918277177Srrs case 0x9: case 0xD: 1919277177Srrs printf("Intel PM\n"); 1920277177Srrs goto not_supported; 1921277177Srrs break; 1922277177Srrs case 0xE: 1923277177Srrs printf("Intel CORE\n"); 1924277177Srrs goto not_supported; 1925277177Srrs break; 1926277177Srrs case 0xF: 1927277177Srrs printf("Intel CORE2\n"); 1928277177Srrs goto not_supported; 1929277177Srrs break; 1930277177Srrs case 0x17: 1931277177Srrs printf("Intel CORE2EXTREME\n"); 1932277177Srrs goto not_supported; 1933277177Srrs break; 1934277177Srrs case 0x1C: /* Per Intel document 320047-002. */ 1935277177Srrs printf("Intel ATOM\n"); 1936277177Srrs goto not_supported; 1937277177Srrs break; 1938277177Srrs case 0x1A: 1939277177Srrs case 0x1E: /* 1940277177Srrs * Per Intel document 253669-032 9/2009, 1941277177Srrs * pages A-2 and A-57 1942277177Srrs */ 1943277177Srrs case 0x1F: /* 1944277177Srrs * Per Intel document 253669-032 9/2009, 1945277177Srrs * pages A-2 and A-57 1946277177Srrs */ 1947277177Srrs printf("Intel COREI7\n"); 1948277177Srrs goto not_supported; 1949277177Srrs break; 1950277177Srrs case 0x2E: 1951277177Srrs printf("Intel NEHALEM\n"); 1952277177Srrs goto not_supported; 1953277177Srrs break; 1954277177Srrs case 0x25: /* Per Intel document 253669-033US 12/2009. */ 1955277177Srrs case 0x2C: /* Per Intel document 253669-033US 12/2009. */ 1956277177Srrs printf("Intel WESTMERE\n"); 1957277177Srrs goto not_supported; 1958277177Srrs break; 1959277177Srrs case 0x2F: /* Westmere-EX, seen in wild */ 1960277177Srrs printf("Intel WESTMERE\n"); 1961277177Srrs goto not_supported; 1962277177Srrs break; 1963277177Srrs case 0x2A: /* Per Intel document 253669-039US 05/2011. */ 1964277177Srrs printf("Intel SANDYBRIDGE\n"); 1965277177Srrs set_sandybridge(); 1966277177Srrs break; 1967277177Srrs case 0x2D: /* Per Intel document 253669-044US 08/2012. */ 1968277177Srrs printf("Intel SANDYBRIDGE_XEON\n"); 1969277177Srrs set_sandybridge(); 1970277177Srrs break; 1971277177Srrs case 0x3A: /* Per Intel document 253669-043US 05/2012. */ 1972277177Srrs printf("Intel IVYBRIDGE\n"); 1973277177Srrs set_ivybridge(); 1974277177Srrs break; 1975277177Srrs case 0x3E: /* Per Intel document 325462-045US 01/2013. */ 1976277177Srrs printf("Intel IVYBRIDGE_XEON\n"); 1977277177Srrs set_ivybridge(); 1978277177Srrs break; 1979277177Srrs case 0x3F: /* Per Intel document 325462-045US 09/2014. */ 1980277177Srrs printf("Intel HASWELL (Xeon)\n"); 1981277177Srrs set_haswell(); 1982277177Srrs break; 1983277177Srrs case 0x3C: /* Per Intel document 325462-045US 01/2013. */ 1984277177Srrs case 0x45: 1985277177Srrs case 0x46: 1986277177Srrs printf("Intel HASWELL\n"); 1987277177Srrs set_haswell(); 1988277177Srrs break; 1989277177Srrs case 0x4D: 1990277177Srrs /* Per Intel document 330061-001 01/2014. */ 1991277177Srrs printf("Intel ATOM_SILVERMONT\n"); 1992277177Srrs goto not_supported; 1993277177Srrs break; 1994277177Srrs default: 1995277177Srrs printf("Intel model 0x%x is not known -- sorry\n", 1996277177Srrs model); 1997277177Srrs goto not_supported; 1998277177Srrs break; 1999277177Srrs } 2000277177Srrs break; 2001277177Srrs case 0xF00: /* P4 */ 2002277177Srrs printf("Intel unknown model %d\n", model); 2003277177Srrs goto not_supported; 2004277177Srrs break; 2005277177Srrs } 2006277177Srrs /* Ok lets load the list of all known PMC's */ 2007277177Srrs io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command); 2008277177Srrs if (valid_pmcs == NULL) { 2009277177Srrs /* Likely */ 2010277177Srrs pmc_allocated_cnt = PMC_INITIAL_ALLOC; 2011277177Srrs sz = sizeof(char *) * pmc_allocated_cnt; 2012277177Srrs valid_pmcs = malloc(sz); 2013277177Srrs if (valid_pmcs == NULL) { 2014277177Srrs printf("No memory allocation fails at startup?\n"); 2015277177Srrs exit(-1); 2016277177Srrs } 2017277177Srrs memset(valid_pmcs, 0, sz); 2018277177Srrs } 2019277177Srrs 2020277177Srrs while (fgets(linebuf, sizeof(linebuf), io) != NULL) { 2021277177Srrs if (linebuf[0] != '\t') { 2022277177Srrs /* sometimes headers ;-) */ 2023277177Srrs continue; 2024277177Srrs } 2025277177Srrs len = strlen(linebuf); 2026277177Srrs if (linebuf[(len-1)] == '\n') { 2027277177Srrs /* Likely */ 2028277177Srrs linebuf[(len-1)] = 0; 2029277177Srrs } 2030277177Srrs str = &linebuf[1]; 2031277177Srrs len = strlen(str) + 1; 2032277177Srrs valid_pmcs[valid_pmc_cnt] = malloc(len); 2033277177Srrs if (valid_pmcs[valid_pmc_cnt] == NULL) { 2034277177Srrs printf("No memory2 allocation fails at startup?\n"); 2035277177Srrs exit(-1); 2036277177Srrs } 2037277177Srrs memset(valid_pmcs[valid_pmc_cnt], 0, len); 2038277177Srrs strcpy(valid_pmcs[valid_pmc_cnt], str); 2039277177Srrs valid_pmc_cnt++; 2040277177Srrs if (valid_pmc_cnt >= pmc_allocated_cnt) { 2041277177Srrs /* Got to expand -- unlikely */ 2042277177Srrs char **more; 2043277177Srrs 2044277177Srrs sz = sizeof(char *) * (pmc_allocated_cnt * 2); 2045277177Srrs more = malloc(sz); 2046277177Srrs if (more == NULL) { 2047277177Srrs printf("No memory3 allocation fails at startup?\n"); 2048277177Srrs exit(-1); 2049277177Srrs } 2050285853Semaste memset(more, 0, sz); 2051277177Srrs memcpy(more, valid_pmcs, sz); 2052277177Srrs pmc_allocated_cnt *= 2; 2053277177Srrs free(valid_pmcs); 2054277177Srrs valid_pmcs = more; 2055277177Srrs } 2056277177Srrs } 2057277177Srrs my_pclose(io, pid_of_command); 2058277177Srrs return; 2059277177Srrsnot_supported: 2060277177Srrs printf("Not supported\n"); 2061277177Srrs exit(-1); 2062277177Srrs} 2063277177Srrs 2064277177Srrsstatic void 2065277177Srrsexplain_all(void) 2066277177Srrs{ 2067277177Srrs int i; 2068277177Srrs printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype); 2069277177Srrs printf("-------------------------------------------------------------\n"); 2070277177Srrs for(i=0; i<the_cpu.number; i++){ 2071277177Srrs printf("For -e %s ", the_cpu.ents[i].name); 2072277177Srrs (*the_cpu.explain)(the_cpu.ents[i].name); 2073277177Srrs printf("----------------------------\n"); 2074277177Srrs } 2075277177Srrs} 2076277177Srrs 2077277177Srrsstatic void 2078277177Srrstest_for_a_pmc(const char *pmc, int out_so_far) 2079277177Srrs{ 2080277177Srrs FILE *io; 2081277177Srrs pid_t pid_of_command=0; 2082277177Srrs char my_command[1024]; 2083277177Srrs char line[1024]; 2084277177Srrs char resp[1024]; 2085277177Srrs int len, llen, i; 2086277177Srrs 2087277177Srrs if (out_so_far < 50) { 2088277177Srrs len = 50 - out_so_far; 2089277177Srrs for(i=0; i<len; i++) { 2090277177Srrs printf(" "); 2091277177Srrs } 2092277177Srrs } 2093277177Srrs sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc); 2094277177Srrs io = my_popen(my_command, "r", &pid_of_command); 2095277177Srrs if (io == NULL) { 2096277177Srrs printf("Failed -- popen fails\n"); 2097277177Srrs return; 2098277177Srrs } 2099277177Srrs /* Setup what we expect */ 2100277177Srrs len = sprintf(resp, "%s", pmc); 2101277177Srrs if (fgets(line, sizeof(line), io) == NULL) { 2102277177Srrs printf("Failed -- no output from pmstat\n"); 2103277177Srrs goto out; 2104277177Srrs } 2105277177Srrs llen = strlen(line); 2106277177Srrs if (line[(llen-1)] == '\n') { 2107277177Srrs line[(llen-1)] = 0; 2108277177Srrs llen--; 2109277177Srrs } 2110277177Srrs for(i=2; i<(llen-len); i++) { 2111277177Srrs if (strncmp(&line[i], "ERROR", 5) == 0) { 2112277177Srrs printf("Failed %s\n", line); 2113277177Srrs goto out; 2114277177Srrs } else if (strncmp(&line[i], resp, len) == 0) { 2115277177Srrs int j, k; 2116277177Srrs 2117277177Srrs if (fgets(line, sizeof(line), io) == NULL) { 2118277177Srrs printf("Failed -- no second output from pmstat\n"); 2119277177Srrs goto out; 2120277177Srrs } 2121277177Srrs len = strlen(line); 2122277177Srrs for (j=0; j<len; j++) { 2123277177Srrs if (line[j] == ' ') { 2124277177Srrs j++; 2125277177Srrs } else { 2126277177Srrs break; 2127277177Srrs } 2128277177Srrs } 2129277177Srrs printf("Pass"); 2130277177Srrs len = strlen(&line[j]); 2131277177Srrs if (len < 20) { 2132277177Srrs for(k=0; k<(20-len); k++) { 2133277177Srrs printf(" "); 2134277177Srrs } 2135277177Srrs } 2136281235Srrs if (len) { 2137281235Srrs printf("%s", &line[j]); 2138281235Srrs } else { 2139281235Srrs printf("\n"); 2140281235Srrs } 2141277177Srrs goto out; 2142277177Srrs } 2143277177Srrs } 2144277177Srrs printf("Failed -- '%s' not '%s'\n", line, resp); 2145277177Srrsout: 2146277177Srrs my_pclose(io, pid_of_command); 2147277177Srrs 2148277177Srrs} 2149277177Srrs 2150277177Srrsstatic int 2151277177Srrsadd_it_to(char **vars, int cur_cnt, char *name) 2152277177Srrs{ 2153277177Srrs int i; 2154277177Srrs size_t len; 2155277177Srrs for(i=0; i<cur_cnt; i++) { 2156277177Srrs if (strcmp(vars[i], name) == 0) { 2157277177Srrs /* Already have */ 2158277177Srrs return(0); 2159277177Srrs } 2160277177Srrs } 2161277177Srrs if (vars[cur_cnt] != NULL) { 2162277177Srrs printf("Cur_cnt:%d filled with %s??\n", 2163277177Srrs cur_cnt, vars[cur_cnt]); 2164277177Srrs exit(-1); 2165277177Srrs } 2166277177Srrs /* Ok its new */ 2167277177Srrs len = strlen(name) + 1; 2168277177Srrs vars[cur_cnt] = malloc(len); 2169277177Srrs if (vars[cur_cnt] == NULL) { 2170277177Srrs printf("No memory %s\n", __FUNCTION__); 2171277177Srrs exit(-1); 2172277177Srrs } 2173277177Srrs memset(vars[cur_cnt], 0, len); 2174277177Srrs strcpy(vars[cur_cnt], name); 2175277177Srrs return(1); 2176277177Srrs} 2177277177Srrs 2178277177Srrsstatic char * 2179277177Srrsbuild_command_for_exp(struct expression *exp) 2180277177Srrs{ 2181277177Srrs /* 2182277177Srrs * Build the pmcstat command to handle 2183277177Srrs * the passed in expression. 2184277177Srrs * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ 2185277177Srrs * where NNN and QQQ represent the PMC's in the expression 2186277177Srrs * uniquely.. 2187277177Srrs */ 2188277177Srrs char forming[1024]; 2189277177Srrs int cnt_pmc, alloced_pmcs, i; 2190277177Srrs struct expression *at; 2191277177Srrs char **vars, *cmd; 2192277177Srrs size_t mal; 2193277177Srrs 2194277177Srrs alloced_pmcs = cnt_pmc = 0; 2195277177Srrs /* first how many do we have */ 2196277177Srrs at = exp; 2197277177Srrs while (at) { 2198277177Srrs if (at->type == TYPE_VALUE_PMC) { 2199277177Srrs cnt_pmc++; 2200277177Srrs } 2201277177Srrs at = at->next; 2202277177Srrs } 2203277177Srrs if (cnt_pmc == 0) { 2204277177Srrs printf("No PMC's in your expression -- nothing to do!!\n"); 2205277177Srrs exit(0); 2206277177Srrs } 2207277177Srrs mal = cnt_pmc * sizeof(char *); 2208277177Srrs vars = malloc(mal); 2209277177Srrs if (vars == NULL) { 2210277177Srrs printf("No memory\n"); 2211277177Srrs exit(-1); 2212277177Srrs } 2213277177Srrs memset(vars, 0, mal); 2214277177Srrs at = exp; 2215277177Srrs while (at) { 2216277177Srrs if (at->type == TYPE_VALUE_PMC) { 2217277177Srrs if(add_it_to(vars, alloced_pmcs, at->name)) { 2218277177Srrs alloced_pmcs++; 2219277177Srrs } 2220277177Srrs } 2221277177Srrs at = at->next; 2222277177Srrs } 2223277177Srrs /* Now we have a unique list in vars so create our command */ 2224277177Srrs mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */ 2225277177Srrs for(i=0; i<alloced_pmcs; i++) { 2226277177Srrs mal += strlen(vars[i]) + 4; /* var + " -s " */ 2227277177Srrs } 2228277177Srrs cmd = malloc((mal+2)); 2229277177Srrs if (cmd == NULL) { 2230277177Srrs printf("%s out of mem\n", __FUNCTION__); 2231277177Srrs exit(-1); 2232277177Srrs } 2233277177Srrs memset(cmd, 0, (mal+2)); 2234277177Srrs strcpy(cmd, "/usr/sbin/pmcstat -w 1"); 2235277177Srrs at = exp; 2236277177Srrs for(i=0; i<alloced_pmcs; i++) { 2237277177Srrs sprintf(forming, " -s %s", vars[i]); 2238277177Srrs strcat(cmd, forming); 2239277177Srrs free(vars[i]); 2240277177Srrs vars[i] = NULL; 2241277177Srrs } 2242277177Srrs free(vars); 2243277177Srrs return(cmd); 2244277177Srrs} 2245277177Srrs 2246277177Srrsstatic int 2247277177Srrsuser_expr(struct counters *cpu, int pos) 2248277177Srrs{ 2249277177Srrs int ret; 2250277177Srrs double res; 2251277177Srrs struct counters *var; 2252277177Srrs struct expression *at; 2253277177Srrs 2254277177Srrs at = master_exp; 2255277177Srrs while (at) { 2256277177Srrs if (at->type == TYPE_VALUE_PMC) { 2257277177Srrs var = find_counter(cpu, at->name); 2258277177Srrs if (var == NULL) { 2259277177Srrs printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name); 2260277177Srrs exit(-1); 2261277177Srrs } 2262277177Srrs if (pos != -1) { 2263277177Srrs at->value = var->vals[pos] * 1.0; 2264277177Srrs } else { 2265277177Srrs at->value = var->sum * 1.0; 2266277177Srrs } 2267277177Srrs } 2268277177Srrs at = at->next; 2269277177Srrs } 2270277177Srrs res = run_expr(master_exp, 1, NULL); 2271277177Srrs ret = printf("%1.3f", res); 2272277177Srrs return(ret); 2273277177Srrs} 2274277177Srrs 2275277177Srrs 2276277177Srrsstatic void 2277277177Srrsset_manual_exp(struct expression *exp) 2278277177Srrs{ 2279277177Srrs expression = user_expr; 2280277177Srrs command = build_command_for_exp(exp); 2281277177Srrs threshold = "User defined threshold"; 2282277177Srrs} 2283277177Srrs 2284277177Srrsstatic void 2285277177Srrsrun_tests(void) 2286277177Srrs{ 2287277177Srrs int i, lenout; 2288277177Srrs printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt); 2289277177Srrs printf("------------------------------------------------------------------------\n"); 2290277177Srrs for(i=0; i<valid_pmc_cnt; i++) { 2291277177Srrs lenout = printf("%s", valid_pmcs[i]); 2292277177Srrs fflush(stdout); 2293277177Srrs test_for_a_pmc(valid_pmcs[i], lenout); 2294277177Srrs } 2295277177Srrs} 2296277177Srrsstatic void 2297277177Srrslist_all(void) 2298277177Srrs{ 2299277177Srrs int i, cnt, j; 2300277177Srrs printf("PMC Abbreviation\n"); 2301277177Srrs printf("--------------------------------------------------------------\n"); 2302277177Srrs for(i=0; i<valid_pmc_cnt; i++) { 2303277177Srrs cnt = printf("%s", valid_pmcs[i]); 2304277177Srrs for(j=cnt; j<52; j++) { 2305277177Srrs printf(" "); 2306277177Srrs } 2307277177Srrs printf("%%%d\n", i); 2308277177Srrs } 2309277177Srrs} 2310277177Srrs 2311277177Srrs 2312277177Srrsint 2313277177Srrsmain(int argc, char **argv) 2314277177Srrs{ 2315277177Srrs int i, j, cnt; 2316277177Srrs char *filename=NULL; 2317277177Srrs char *name=NULL; 2318277177Srrs int help_only = 0; 2319277177Srrs int test_mode = 0; 2320277177Srrs 2321277177Srrs get_cpuid_set(); 2322277177Srrs memset(glob_cpu, 0, sizeof(glob_cpu)); 2323277177Srrs while ((i = getopt(argc, argv, "LHhvm:i:?e:TE:")) != -1) { 2324277177Srrs switch (i) { 2325277177Srrs case 'L': 2326277177Srrs list_all(); 2327277177Srrs return(0); 2328277177Srrs case 'H': 2329277177Srrs printf("**********************************\n"); 2330277177Srrs explain_all(); 2331277177Srrs printf("**********************************\n"); 2332277177Srrs return(0); 2333277177Srrs break; 2334277177Srrs case 'T': 2335277177Srrs test_mode = 1; 2336277177Srrs break; 2337277177Srrs case 'E': 2338277177Srrs master_exp = parse_expression(optarg); 2339277177Srrs if (master_exp) { 2340277177Srrs set_manual_exp(master_exp); 2341277177Srrs } 2342277177Srrs break; 2343277177Srrs case 'e': 2344277177Srrs if (validate_expression(optarg)) { 2345277177Srrs printf("Unknown expression %s\n", optarg); 2346277177Srrs return(0); 2347277177Srrs } 2348277177Srrs name = optarg; 2349277177Srrs set_expression(optarg); 2350277177Srrs break; 2351277177Srrs case 'm': 2352277177Srrs max_to_collect = strtol(optarg, NULL, 0); 2353277177Srrs if (max_to_collect > MAX_COUNTER_SLOTS) { 2354277177Srrs /* You can't collect more than max in array */ 2355277177Srrs max_to_collect = MAX_COUNTER_SLOTS; 2356277177Srrs } 2357277177Srrs break; 2358277177Srrs case 'v': 2359277177Srrs verbose++; 2360277177Srrs break; 2361277177Srrs case 'h': 2362277177Srrs help_only = 1; 2363277177Srrs break; 2364277177Srrs case 'i': 2365277177Srrs filename = optarg; 2366277177Srrs break; 2367277177Srrs case '?': 2368277177Srrs default: 2369277177Srrs use: 2370277177Srrs printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n", 2371277177Srrs argv[0]); 2372277177Srrs printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n"); 2373277177Srrs printf("-v -- verbose dump debug type things -- you don't want this\n"); 2374277177Srrs printf("-m N -- maximum to collect is N measurments\n"); 2375277177Srrs printf("-e expr-name -- Do expression expr-name\n"); 2376277177Srrs printf("-E 'your expression' -- Do your expression\n"); 2377277177Srrs printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n"); 2378277177Srrs printf("-H -- Don't run anything, just explain all canned expressions\n"); 2379277177Srrs printf("-T -- Test all PMC's defined by this processor\n"); 2380277177Srrs return(0); 2381277177Srrs break; 2382277177Srrs }; 2383277177Srrs } 2384277177Srrs if ((name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) { 2385277177Srrs printf("Without setting an expression we cannot dynamically gather information\n"); 2386277177Srrs printf("you must supply a filename (and you probably want verbosity)\n"); 2387277177Srrs goto use; 2388277177Srrs } 2389277177Srrs if (test_mode) { 2390277177Srrs run_tests(); 2391277177Srrs return(0); 2392277177Srrs } 2393277177Srrs printf("*********************************\n"); 2394277177Srrs if (master_exp == NULL) { 2395277177Srrs (*the_cpu.explain)(name); 2396277177Srrs } else { 2397277177Srrs printf("Examine your expression "); 2398277177Srrs print_exp(master_exp); 2399277177Srrs printf("User defined threshold\n"); 2400277177Srrs } 2401277177Srrs if (help_only) { 2402277177Srrs return(0); 2403277177Srrs } 2404277177Srrs process_file(filename); 2405277177Srrs if (verbose >= 2) { 2406277177Srrs for (i=0; i<ncnts; i++) { 2407277177Srrs printf("Counter:%s cpu:%d index:%d\n", 2408277177Srrs cnts[i].counter_name, 2409277177Srrs cnts[i].cpu, i); 2410277177Srrs for(j=0; j<cnts[i].pos; j++) { 2411277177Srrs printf(" val - %ld\n", (long int)cnts[i].vals[j]); 2412277177Srrs } 2413277177Srrs printf(" sum - %ld\n", (long int)cnts[i].sum); 2414277177Srrs } 2415277177Srrs } 2416277177Srrs if (expression == NULL) { 2417277177Srrs return(0); 2418277177Srrs } 2419277177Srrs for(i=0, cnt=0; i<MAX_CPU; i++) { 2420277177Srrs if (glob_cpu[i]) { 2421277177Srrs do_expression(glob_cpu[i], -1); 2422277177Srrs cnt++; 2423277177Srrs if (cnt == cpu_count_out) { 2424277177Srrs printf("\n"); 2425277177Srrs break; 2426277177Srrs } else { 2427277177Srrs printf("\t"); 2428277177Srrs } 2429277177Srrs } 2430277177Srrs } 2431277177Srrs return(0); 2432277177Srrs} 2433