1277177Srrs/*- 2277177Srrs * Copyright (c) 2014, 2015 Netflix Inc. 3277177Srrs * All rights reserved. 4277177Srrs * 5277177Srrs * Redistribution and use in source and binary forms, with or without 6277177Srrs * modification, are permitted provided that the following conditions 7277177Srrs * are met: 8277177Srrs * 1. Redistributions of source code must retain the above copyright 9277177Srrs * notice, this list of conditions and the following disclaimer, 10277177Srrs * in this position and unchanged. 11277177Srrs * 2. Redistributions in binary form must reproduce the above copyright 12277177Srrs * notice, this list of conditions and the following disclaimer in the 13277177Srrs * documentation and/or other materials provided with the distribution. 14277177Srrs * 3. The name of the author may not be used to endorse or promote products 15277177Srrs * derived from this software without specific prior written permission 16277177Srrs * 17277177Srrs * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18277177Srrs * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19277177Srrs * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20277177Srrs * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21277177Srrs * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22277177Srrs * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23277177Srrs * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24277177Srrs * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25277177Srrs * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26277177Srrs * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27277177Srrs */ 28277177Srrs#include <sys/types.h> 29277177Srrs#include <stdio.h> 30277177Srrs#include <stdlib.h> 31277177Srrs#include <unistd.h> 32277177Srrs#include <string.h> 33277177Srrs#include <strings.h> 34277177Srrs#include <sys/errno.h> 35277177Srrs#include <signal.h> 36277177Srrs#include <sys/wait.h> 37277177Srrs#include <getopt.h> 38277177Srrs#include "eval_expr.h" 39277177Srrs__FBSDID("$FreeBSD: releng/11.0/usr.sbin/pmcstudy/pmcstudy.c 303675 2016-08-02 20:18:43Z bdrewery $"); 40277177Srrs 41292043Srrsstatic int max_pmc_counters = 1; 42292043Srrsstatic int run_all = 0; 43292043Srrs 44277177Srrs#define MAX_COUNTER_SLOTS 1024 45277177Srrs#define MAX_NLEN 64 46277177Srrs#define MAX_CPU 64 47277177Srrsstatic int verbose = 0; 48277177Srrs 49277177Srrsextern char **environ; 50277177Srrsextern struct expression *master_exp; 51292043Srrsstruct expression *master_exp=NULL; 52277177Srrs 53277177Srrs#define PMC_INITIAL_ALLOC 512 54277177Srrsextern char **valid_pmcs; 55277177Srrschar **valid_pmcs = NULL; 56277177Srrsextern int valid_pmc_cnt; 57292043Srrsint valid_pmc_cnt=0; 58277177Srrsextern int pmc_allocated_cnt; 59292043Srrsint pmc_allocated_cnt=0; 60277177Srrs 61277177Srrs/* 62277177Srrs * The following two varients on popen and pclose with 63277177Srrs * the cavet that they get you the PID so that you 64292043Srrs * can supply it to pclose so it can send a SIGTERM 65277177Srrs * to the process. 66277177Srrs */ 67277177Srrsstatic FILE * 68277177Srrsmy_popen(const char *command, const char *dir, pid_t *p_pid) 69277177Srrs{ 70277177Srrs FILE *io_out, *io_in; 71277177Srrs int pdesin[2], pdesout[2]; 72277177Srrs char *argv[4]; 73277177Srrs pid_t pid; 74277177Srrs char cmd[4]; 75277177Srrs char cmd2[1024]; 76277177Srrs char arg1[4]; 77277177Srrs 78277177Srrs if ((strcmp(dir, "r") != 0) && 79277177Srrs (strcmp(dir, "w") != 0)) { 80277177Srrs errno = EINVAL; 81292043Srrs return(NULL); 82277177Srrs } 83277177Srrs if (pipe(pdesin) < 0) 84277177Srrs return (NULL); 85277177Srrs 86277177Srrs if (pipe(pdesout) < 0) { 87277177Srrs (void)close(pdesin[0]); 88277177Srrs (void)close(pdesin[1]); 89277177Srrs return (NULL); 90277177Srrs } 91277177Srrs strcpy(cmd, "sh"); 92277177Srrs strcpy(arg1, "-c"); 93277177Srrs strcpy(cmd2, command); 94277177Srrs argv[0] = cmd; 95277177Srrs argv[1] = arg1; 96277177Srrs argv[2] = cmd2; 97277177Srrs argv[3] = NULL; 98277177Srrs 99277177Srrs switch (pid = fork()) { 100292043Srrs case -1: /* Error. */ 101277177Srrs (void)close(pdesin[0]); 102277177Srrs (void)close(pdesin[1]); 103277177Srrs (void)close(pdesout[0]); 104277177Srrs (void)close(pdesout[1]); 105277177Srrs return (NULL); 106277177Srrs /* NOTREACHED */ 107292043Srrs case 0: /* Child. */ 108277177Srrs /* Close out un-used sides */ 109277177Srrs (void)close(pdesin[1]); 110277177Srrs (void)close(pdesout[0]); 111277177Srrs /* Now prepare the stdin of the process */ 112277177Srrs close(0); 113277177Srrs (void)dup(pdesin[0]); 114277177Srrs (void)close(pdesin[0]); 115277177Srrs /* Now prepare the stdout of the process */ 116277177Srrs close(1); 117277177Srrs (void)dup(pdesout[1]); 118277177Srrs /* And lets do stderr just in case */ 119277177Srrs close(2); 120277177Srrs (void)dup(pdesout[1]); 121277177Srrs (void)close(pdesout[1]); 122277177Srrs /* Now run it */ 123277177Srrs execve("/bin/sh", argv, environ); 124277177Srrs exit(127); 125277177Srrs /* NOTREACHED */ 126277177Srrs } 127277177Srrs /* Parent; assume fdopen can't fail. */ 128277177Srrs /* Store the pid */ 129277177Srrs *p_pid = pid; 130277177Srrs if (strcmp(dir, "r") != 0) { 131277177Srrs io_out = fdopen(pdesin[1], "w"); 132277177Srrs (void)close(pdesin[0]); 133277177Srrs (void)close(pdesout[0]); 134277177Srrs (void)close(pdesout[1]); 135292043Srrs return(io_out); 136292043Srrs } else { 137277177Srrs /* Prepare the input stream */ 138277177Srrs io_in = fdopen(pdesout[0], "r"); 139277177Srrs (void)close(pdesout[1]); 140277177Srrs (void)close(pdesin[0]); 141277177Srrs (void)close(pdesin[1]); 142277177Srrs return (io_in); 143277177Srrs } 144277177Srrs} 145277177Srrs 146277177Srrs/* 147277177Srrs * pclose -- 148277177Srrs * Pclose returns -1 if stream is not associated with a `popened' command, 149277177Srrs * if already `pclosed', or waitpid returns an error. 150277177Srrs */ 151277177Srrsstatic void 152292043Srrsmy_pclose(FILE *io, pid_t the_pid) 153277177Srrs{ 154277177Srrs int pstat; 155277177Srrs pid_t pid; 156277177Srrs 157277177Srrs /* 158277177Srrs * Find the appropriate file pointer and remove it from the list. 159277177Srrs */ 160277177Srrs (void)fclose(io); 161277177Srrs /* Die if you are not dead! */ 162277177Srrs kill(the_pid, SIGTERM); 163277177Srrs do { 164277177Srrs pid = wait4(the_pid, &pstat, 0, (struct rusage *)0); 165277177Srrs } while (pid == -1 && errno == EINTR); 166277177Srrs} 167277177Srrs 168277177Srrsstruct counters { 169277177Srrs struct counters *next_cpu; 170292043Srrs char counter_name[MAX_NLEN]; /* Name of counter */ 171292043Srrs int cpu; /* CPU we are on */ 172292043Srrs int pos; /* Index we are filling to. */ 173277177Srrs uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */ 174292043Srrs uint64_t sum; /* Summary of entries */ 175277177Srrs}; 176277177Srrs 177277177Srrsextern struct counters *glob_cpu[MAX_CPU]; 178277177Srrsstruct counters *glob_cpu[MAX_CPU]; 179277177Srrs 180277177Srrsextern struct counters *cnts; 181292043Srrsstruct counters *cnts=NULL; 182277177Srrs 183277177Srrsextern int ncnts; 184292043Srrsint ncnts=0; 185277177Srrs 186292043Srrsextern int (*expression)(struct counters *, int); 187292043Srrsint (*expression)(struct counters *, int); 188277177Srrs 189292043Srrsstatic const char *threshold=NULL; 190277177Srrsstatic const char *command; 191277177Srrs 192277177Srrsstruct cpu_entry { 193277177Srrs const char *name; 194277177Srrs const char *thresh; 195277177Srrs const char *command; 196292043Srrs int (*func)(struct counters *, int); 197292043Srrs int counters_required; 198277177Srrs}; 199277177Srrs 200277177Srrsstruct cpu_type { 201277177Srrs char cputype[32]; 202277177Srrs int number; 203277177Srrs struct cpu_entry *ents; 204292043Srrs void (*explain)(const char *name); 205277177Srrs}; 206277177Srrsextern struct cpu_type the_cpu; 207277177Srrsstruct cpu_type the_cpu; 208277177Srrs 209277177Srrsstatic void 210277177Srrsexplain_name_sb(const char *name) 211277177Srrs{ 212277177Srrs const char *mythresh; 213277177Srrs if (strcmp(name, "allocstall1") == 0) { 214277177Srrs printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n"); 215277177Srrs mythresh = "thresh > .05"; 216277177Srrs } else if (strcmp(name, "allocstall2") == 0) { 217277177Srrs printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n"); 218277177Srrs mythresh = "thresh > .05"; 219277177Srrs } else if (strcmp(name, "br_miss") == 0) { 220277177Srrs printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n"); 221277177Srrs mythresh = "thresh >= .2"; 222277177Srrs } else if (strcmp(name, "splitload") == 0) { 223292043Srrs printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 224277177Srrs mythresh = "thresh >= .1"; 225277177Srrs } else if (strcmp(name, "splitstore") == 0) { 226292043Srrs printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); 227277177Srrs mythresh = "thresh >= .01"; 228277177Srrs } else if (strcmp(name, "contested") == 0) { 229277177Srrs printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 230277177Srrs mythresh = "thresh >= .05"; 231277177Srrs } else if (strcmp(name, "blockstorefwd") == 0) { 232277177Srrs printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 233277177Srrs mythresh = "thresh >= .05"; 234277177Srrs } else if (strcmp(name, "cache2") == 0) { 235277177Srrs printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n"); 236277177Srrs printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n"); 237277177Srrs printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n"); 238277177Srrs printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n"); 239277177Srrs mythresh = "thresh >= .2"; 240277177Srrs } else if (strcmp(name, "cache1") == 0) { 241277177Srrs printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 242277177Srrs mythresh = "thresh >= .2"; 243277177Srrs } else if (strcmp(name, "dtlbmissload") == 0) { 244277177Srrs printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 245277177Srrs printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 246277177Srrs mythresh = "thresh >= .1"; 247277177Srrs } else if (strcmp(name, "frontendstall") == 0) { 248277177Srrs printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 249277177Srrs mythresh = "thresh >= .15"; 250277177Srrs } else if (strcmp(name, "clears") == 0) { 251277177Srrs printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 252277177Srrs printf(" MACHINE_CLEARS.SMC + \n"); 253277177Srrs printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 254277177Srrs mythresh = "thresh >= .02"; 255277177Srrs } else if (strcmp(name, "microassist") == 0) { 256277177Srrs printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); 257277177Srrs printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 258277177Srrs mythresh = "thresh >= .05"; 259277177Srrs } else if (strcmp(name, "aliasing_4k") == 0) { 260277177Srrs printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 261277177Srrs mythresh = "thresh >= .1"; 262277177Srrs } else if (strcmp(name, "fpassist") == 0) { 263277177Srrs printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 264277177Srrs mythresh = "look for a excessive value"; 265277177Srrs } else if (strcmp(name, "otherassistavx") == 0) { 266277177Srrs printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 267277177Srrs mythresh = "look for a excessive value"; 268277177Srrs } else if (strcmp(name, "otherassistsse") == 0) { 269277177Srrs printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 270277177Srrs mythresh = "look for a excessive value"; 271277177Srrs } else if (strcmp(name, "eff1") == 0) { 272277177Srrs printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 273277177Srrs mythresh = "thresh < .9"; 274277177Srrs } else if (strcmp(name, "eff2") == 0) { 275277177Srrs printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 276277177Srrs mythresh = "thresh > 1.0"; 277277177Srrs } else if (strcmp(name, "dtlbmissstore") == 0) { 278277177Srrs printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 279277177Srrs printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 280277177Srrs mythresh = "thresh >= .05"; 281277177Srrs } else { 282277177Srrs printf("Unknown name:%s\n", name); 283277177Srrs mythresh = "unknown entry"; 284292043Srrs } 285277177Srrs printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 286277177Srrs} 287277177Srrs 288277177Srrsstatic void 289277177Srrsexplain_name_ib(const char *name) 290277177Srrs{ 291277177Srrs const char *mythresh; 292277177Srrs if (strcmp(name, "br_miss") == 0) { 293277177Srrs printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n"); 294277177Srrs printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n"); 295277177Srrs printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n"); 296277177Srrs mythresh = "thresh >= .2"; 297277177Srrs } else if (strcmp(name, "eff1") == 0) { 298277177Srrs printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 299277177Srrs mythresh = "thresh < .9"; 300277177Srrs } else if (strcmp(name, "eff2") == 0) { 301277177Srrs printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 302277177Srrs mythresh = "thresh > 1.0"; 303277177Srrs } else if (strcmp(name, "cache1") == 0) { 304277177Srrs printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 305277177Srrs mythresh = "thresh >= .2"; 306277177Srrs } else if (strcmp(name, "cache2") == 0) { 307277177Srrs printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n"); 308277177Srrs mythresh = "thresh >= .2"; 309277177Srrs } else if (strcmp(name, "itlbmiss") == 0) { 310277177Srrs printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 311292043Srrs mythresh = "thresh > .05"; 312277177Srrs } else if (strcmp(name, "icachemiss") == 0) { 313277177Srrs printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); 314277177Srrs mythresh = "thresh > .05"; 315277177Srrs } else if (strcmp(name, "lcpstall") == 0) { 316277177Srrs printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 317277177Srrs mythresh = "thresh > .05"; 318277177Srrs } else if (strcmp(name, "datashare") == 0) { 319277177Srrs printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n"); 320277177Srrs mythresh = "thresh > .05"; 321277177Srrs } else if (strcmp(name, "blockstorefwd") == 0) { 322277177Srrs printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 323277177Srrs mythresh = "thresh >= .05"; 324277177Srrs } else if (strcmp(name, "splitload") == 0) { 325277177Srrs printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n"); 326277177Srrs printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n"); 327277177Srrs mythresh = "thresh >= .1"; 328277177Srrs } else if (strcmp(name, "splitstore") == 0) { 329292043Srrs printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); 330277177Srrs mythresh = "thresh >= .01"; 331277177Srrs } else if (strcmp(name, "aliasing_4k") == 0) { 332277177Srrs printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 333277177Srrs mythresh = "thresh >= .1"; 334277177Srrs } else if (strcmp(name, "dtlbmissload") == 0) { 335277177Srrs printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 336277177Srrs printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 337277177Srrs mythresh = "thresh >= .1"; 338277177Srrs } else if (strcmp(name, "dtlbmissstore") == 0) { 339277177Srrs printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); 340277177Srrs printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 341277177Srrs mythresh = "thresh >= .05"; 342277177Srrs } else if (strcmp(name, "contested") == 0) { 343277177Srrs printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); 344277177Srrs mythresh = "thresh >= .05"; 345277177Srrs } else if (strcmp(name, "clears") == 0) { 346277177Srrs printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 347277177Srrs printf(" MACHINE_CLEARS.SMC + \n"); 348277177Srrs printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 349277177Srrs mythresh = "thresh >= .02"; 350277177Srrs } else if (strcmp(name, "microassist") == 0) { 351277177Srrs printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 352277177Srrs printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 353277177Srrs mythresh = "thresh >= .05"; 354277177Srrs } else if (strcmp(name, "fpassist") == 0) { 355277177Srrs printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 356277177Srrs mythresh = "look for a excessive value"; 357277177Srrs } else if (strcmp(name, "otherassistavx") == 0) { 358277177Srrs printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 359277177Srrs mythresh = "look for a excessive value"; 360277177Srrs } else if (strcmp(name, "otherassistsse") == 0) { 361277177Srrs printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 362277177Srrs mythresh = "look for a excessive value"; 363277177Srrs } else { 364277177Srrs printf("Unknown name:%s\n", name); 365277177Srrs mythresh = "unknown entry"; 366292043Srrs } 367277177Srrs printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 368277177Srrs} 369277177Srrs 370277177Srrs 371277177Srrsstatic void 372277177Srrsexplain_name_has(const char *name) 373277177Srrs{ 374277177Srrs const char *mythresh; 375277177Srrs if (strcmp(name, "eff1") == 0) { 376277177Srrs printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 377277177Srrs mythresh = "thresh < .75"; 378277177Srrs } else if (strcmp(name, "eff2") == 0) { 379277177Srrs printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 380277177Srrs mythresh = "thresh > 1.0"; 381277177Srrs } else if (strcmp(name, "itlbmiss") == 0) { 382277177Srrs printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); 383292043Srrs mythresh = "thresh > .05"; 384277177Srrs } else if (strcmp(name, "icachemiss") == 0) { 385277177Srrs printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n"); 386277177Srrs mythresh = "thresh > .05"; 387277177Srrs } else if (strcmp(name, "lcpstall") == 0) { 388277177Srrs printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 389277177Srrs mythresh = "thresh > .05"; 390277177Srrs } else if (strcmp(name, "cache1") == 0) { 391277177Srrs printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 392277177Srrs mythresh = "thresh >= .2"; 393277177Srrs } else if (strcmp(name, "cache2") == 0) { 394277177Srrs printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n"); 395277177Srrs printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n"); 396277177Srrs printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n"); 397277177Srrs printf(" / CPU_CLK_UNHALTED.THREAD_P\n"); 398277177Srrs mythresh = "thresh >= .2"; 399277177Srrs } else if (strcmp(name, "contested") == 0) { 400277177Srrs printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n"); 401277177Srrs mythresh = "thresh >= .05"; 402277177Srrs } else if (strcmp(name, "datashare") == 0) { 403277177Srrs printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); 404277177Srrs mythresh = "thresh > .05"; 405277177Srrs } else if (strcmp(name, "blockstorefwd") == 0) { 406277177Srrs printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 407277177Srrs mythresh = "thresh >= .05"; 408277177Srrs } else if (strcmp(name, "splitload") == 0) { 409292043Srrs printf("Examine (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 410277177Srrs mythresh = "thresh >= .1"; 411277177Srrs } else if (strcmp(name, "splitstore") == 0) { 412292043Srrs printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); 413277177Srrs mythresh = "thresh >= .01"; 414277177Srrs } else if (strcmp(name, "aliasing_4k") == 0) { 415277177Srrs printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); 416277177Srrs mythresh = "thresh >= .1"; 417277177Srrs } else if (strcmp(name, "dtlbmissload") == 0) { 418277177Srrs printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 419277177Srrs printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 420277177Srrs mythresh = "thresh >= .1"; 421277177Srrs } else if (strcmp(name, "br_miss") == 0) { 422277177Srrs printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n"); 423277177Srrs mythresh = "thresh >= .2"; 424277177Srrs } else if (strcmp(name, "clears") == 0) { 425277177Srrs printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 426277177Srrs printf(" MACHINE_CLEARS.SMC + \n"); 427277177Srrs printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 428277177Srrs mythresh = "thresh >= .02"; 429277177Srrs } else if (strcmp(name, "microassist") == 0) { 430277177Srrs printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 431277177Srrs printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 432277177Srrs mythresh = "thresh >= .05"; 433277177Srrs } else if (strcmp(name, "fpassist") == 0) { 434277177Srrs printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 435277177Srrs mythresh = "look for a excessive value"; 436277177Srrs } else if (strcmp(name, "otherassistavx") == 0) { 437277177Srrs printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 438277177Srrs mythresh = "look for a excessive value"; 439277177Srrs } else if (strcmp(name, "otherassistsse") == 0) { 440277177Srrs printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 441277177Srrs mythresh = "look for a excessive value"; 442277177Srrs } else { 443277177Srrs printf("Unknown name:%s\n", name); 444277177Srrs mythresh = "unknown entry"; 445292043Srrs } 446277177Srrs printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 447277177Srrs} 448277177Srrs 449292043Srrs 450292043Srrs 451277177Srrsstatic struct counters * 452277177Srrsfind_counter(struct counters *base, const char *name) 453277177Srrs{ 454277177Srrs struct counters *at; 455277177Srrs int len; 456277177Srrs 457277177Srrs at = base; 458277177Srrs len = strlen(name); 459292043Srrs while(at) { 460277177Srrs if (strncmp(at->counter_name, name, len) == 0) { 461292043Srrs return(at); 462277177Srrs } 463277177Srrs at = at->next_cpu; 464277177Srrs } 465277177Srrs printf("Can't find counter %s\n", name); 466277177Srrs printf("We have:\n"); 467277177Srrs at = base; 468292043Srrs while(at) { 469277177Srrs printf("- %s\n", at->counter_name); 470277177Srrs at = at->next_cpu; 471277177Srrs } 472277177Srrs exit(-1); 473277177Srrs} 474277177Srrs 475277177Srrsstatic int 476277177Srrsallocstall1(struct counters *cpu, int pos) 477277177Srrs{ 478277177Srrs/* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/ 479277177Srrs int ret; 480277177Srrs struct counters *partial; 481277177Srrs struct counters *unhalt; 482277177Srrs double un, par, res; 483277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 484277177Srrs partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW"); 485277177Srrs if (pos != -1) { 486277177Srrs par = partial->vals[pos] * 1.0; 487277177Srrs un = unhalt->vals[pos] * 1.0; 488277177Srrs } else { 489277177Srrs par = partial->sum * 1.0; 490277177Srrs un = unhalt->sum * 1.0; 491277177Srrs } 492292043Srrs res = par/un; 493277177Srrs ret = printf("%1.3f", res); 494292043Srrs return(ret); 495277177Srrs} 496277177Srrs 497277177Srrsstatic int 498277177Srrsallocstall2(struct counters *cpu, int pos) 499277177Srrs{ 500277177Srrs/* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 501277177Srrs int ret; 502277177Srrs struct counters *partial; 503277177Srrs struct counters *unhalt; 504277177Srrs double un, par, res; 505277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 506277177Srrs partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP"); 507277177Srrs if (pos != -1) { 508277177Srrs par = partial->vals[pos] * 1.0; 509277177Srrs un = unhalt->vals[pos] * 1.0; 510277177Srrs } else { 511277177Srrs par = partial->sum * 1.0; 512277177Srrs un = unhalt->sum * 1.0; 513277177Srrs } 514292043Srrs res = par/un; 515277177Srrs ret = printf("%1.3f", res); 516292043Srrs return(ret); 517277177Srrs} 518277177Srrs 519277177Srrsstatic int 520277177Srrsbr_mispredict(struct counters *cpu, int pos) 521277177Srrs{ 522277177Srrs struct counters *brctr; 523277177Srrs struct counters *unhalt; 524277177Srrs int ret; 525277177Srrs/* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 526277177Srrs double br, un, con, res; 527277177Srrs con = 20.0; 528292043Srrs 529277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 530292043Srrs brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 531277177Srrs if (pos != -1) { 532277177Srrs br = brctr->vals[pos] * 1.0; 533277177Srrs un = unhalt->vals[pos] * 1.0; 534277177Srrs } else { 535277177Srrs br = brctr->sum * 1.0; 536277177Srrs un = unhalt->sum * 1.0; 537277177Srrs } 538292043Srrs res = (con * br)/un; 539292043Srrs ret = printf("%1.3f", res); 540292043Srrs return(ret); 541277177Srrs} 542277177Srrs 543277177Srrsstatic int 544277177Srrsbr_mispredictib(struct counters *cpu, int pos) 545277177Srrs{ 546277177Srrs struct counters *brctr; 547277177Srrs struct counters *unhalt; 548277177Srrs struct counters *clear, *clear2, *clear3; 549277177Srrs struct counters *uops; 550292043Srrs struct counters *recv; 551277177Srrs struct counters *iss; 552277177Srrs/* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/ 553277177Srrs int ret; 554292043Srrs /* 555292043Srrs * (BR_MISP_RETIRED.ALL_BRANCHES / 556292043Srrs * (BR_MISP_RETIRED.ALL_BRANCHES + 557292043Srrs * MACHINE_CLEAR.COUNT) * 558292043Srrs * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD))) 559292043Srrs * 560277177Srrs */ 561277177Srrs double br, cl, cl2, cl3, uo, re, un, con, res, is; 562277177Srrs con = 4.0; 563292043Srrs 564277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 565292043Srrs brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 566277177Srrs clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 567277177Srrs clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 568277177Srrs clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 569277177Srrs uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 570277177Srrs iss = find_counter(cpu, "UOPS_ISSUED.ANY"); 571277177Srrs recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); 572277177Srrs if (pos != -1) { 573277177Srrs br = brctr->vals[pos] * 1.0; 574277177Srrs cl = clear->vals[pos] * 1.0; 575277177Srrs cl2 = clear2->vals[pos] * 1.0; 576277177Srrs cl3 = clear3->vals[pos] * 1.0; 577277177Srrs uo = uops->vals[pos] * 1.0; 578277177Srrs re = recv->vals[pos] * 1.0; 579277177Srrs is = iss->vals[pos] * 1.0; 580277177Srrs un = unhalt->vals[pos] * 1.0; 581277177Srrs } else { 582277177Srrs br = brctr->sum * 1.0; 583277177Srrs cl = clear->sum * 1.0; 584277177Srrs cl2 = clear2->sum * 1.0; 585277177Srrs cl3 = clear3->sum * 1.0; 586277177Srrs uo = uops->sum * 1.0; 587277177Srrs re = recv->sum * 1.0; 588277177Srrs is = iss->sum * 1.0; 589277177Srrs un = unhalt->sum * 1.0; 590277177Srrs } 591292043Srrs res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un))); 592292043Srrs ret = printf("%1.3f", res); 593292043Srrs return(ret); 594277177Srrs} 595277177Srrs 596292043Srrs 597277177Srrsstatic int 598292027Srrsbr_mispredict_broad(struct counters *cpu, int pos) 599292027Srrs{ 600292027Srrs struct counters *brctr; 601292027Srrs struct counters *unhalt; 602292027Srrs struct counters *clear; 603292027Srrs struct counters *uops; 604292027Srrs struct counters *uops_ret; 605292027Srrs struct counters *recv; 606292027Srrs int ret; 607292027Srrs double br, cl, uo, uo_r, re, con, un, res; 608292027Srrs 609292027Srrs con = 4.0; 610292043Srrs 611292027Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 612292043Srrs brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); 613292027Srrs clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); 614292027Srrs uops = find_counter(cpu, "UOPS_ISSUED.ANY"); 615292027Srrs uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 616292027Srrs recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); 617292027Srrs 618292027Srrs if (pos != -1) { 619292027Srrs un = unhalt->vals[pos] * 1.0; 620292027Srrs br = brctr->vals[pos] * 1.0; 621292027Srrs cl = clear->vals[pos] * 1.0; 622292027Srrs uo = uops->vals[pos] * 1.0; 623292027Srrs uo_r = uops_ret->vals[pos] * 1.0; 624292027Srrs re = recv->vals[pos] * 1.0; 625292027Srrs } else { 626292027Srrs un = unhalt->sum * 1.0; 627292027Srrs br = brctr->sum * 1.0; 628292027Srrs cl = clear->sum * 1.0; 629292027Srrs uo = uops->sum * 1.0; 630292027Srrs uo_r = uops_ret->sum * 1.0; 631292027Srrs re = recv->sum * 1.0; 632292027Srrs } 633292027Srrs res = br / (br + cl) * (uo - uo_r + con * re) / (un * con); 634292043Srrs ret = printf("%1.3f", res); 635292043Srrs return(ret); 636292027Srrs} 637292027Srrs 638292027Srrsstatic int 639277177Srrssplitloadib(struct counters *cpu, int pos) 640277177Srrs{ 641277177Srrs int ret; 642277177Srrs struct counters *mem; 643277177Srrs struct counters *l1d, *ldblock; 644277177Srrs struct counters *unhalt; 645277177Srrs double un, memd, res, l1, ldb; 646292043Srrs /* 647292043Srrs * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P 648292043Srrs * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 649277177Srrs */ 650277177Srrs 651277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 652277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS"); 653277177Srrs l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING"); 654277177Srrs ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR"); 655277177Srrs if (pos != -1) { 656277177Srrs memd = mem->vals[pos] * 1.0; 657277177Srrs l1 = l1d->vals[pos] * 1.0; 658277177Srrs ldb = ldblock->vals[pos] * 1.0; 659277177Srrs un = unhalt->vals[pos] * 1.0; 660277177Srrs } else { 661277177Srrs memd = mem->sum * 1.0; 662277177Srrs l1 = l1d->sum * 1.0; 663277177Srrs ldb = ldblock->sum * 1.0; 664277177Srrs un = unhalt->sum * 1.0; 665277177Srrs } 666292043Srrs res = ((l1 / memd) * ldb)/un; 667277177Srrs ret = printf("%1.3f", res); 668292043Srrs return(ret); 669277177Srrs} 670277177Srrs 671292043Srrs 672277177Srrsstatic int 673277177Srrssplitload(struct counters *cpu, int pos) 674277177Srrs{ 675277177Srrs int ret; 676277177Srrs struct counters *mem; 677277177Srrs struct counters *unhalt; 678277177Srrs double con, un, memd, res; 679292043Srrs/* 4 - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ 680292028Srrs 681292043Srrs con = 5.0; 682292043Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 683292043Srrs mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS"); 684292043Srrs if (pos != -1) { 685292043Srrs memd = mem->vals[pos] * 1.0; 686292043Srrs un = unhalt->vals[pos] * 1.0; 687292043Srrs } else { 688292043Srrs memd = mem->sum * 1.0; 689292043Srrs un = unhalt->sum * 1.0; 690292043Srrs } 691292043Srrs res = (memd * con)/un; 692292043Srrs ret = printf("%1.3f", res); 693292043Srrs return(ret); 694292043Srrs} 695292043Srrs 696292043Srrs 697292043Srrsstatic int 698292043Srrssplitload_sb(struct counters *cpu, int pos) 699292043Srrs{ 700292043Srrs int ret; 701292043Srrs struct counters *mem; 702292043Srrs struct counters *unhalt; 703292043Srrs double con, un, memd, res; 704277177Srrs/* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ 705277177Srrs 706277177Srrs con = 5.0; 707277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 708277177Srrs mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS"); 709277177Srrs if (pos != -1) { 710277177Srrs memd = mem->vals[pos] * 1.0; 711277177Srrs un = unhalt->vals[pos] * 1.0; 712277177Srrs } else { 713277177Srrs memd = mem->sum * 1.0; 714277177Srrs un = unhalt->sum * 1.0; 715277177Srrs } 716292043Srrs res = (memd * con)/un; 717277177Srrs ret = printf("%1.3f", res); 718292043Srrs return(ret); 719277177Srrs} 720277177Srrs 721292043Srrs 722277177Srrsstatic int 723292043Srrssplitstore_sb(struct counters *cpu, int pos) 724277177Srrs{ 725292043Srrs /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */ 726277177Srrs int ret; 727277177Srrs struct counters *mem_split; 728277177Srrs struct counters *mem_stores; 729277177Srrs double memsplit, memstore, res; 730277177Srrs mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES"); 731277177Srrs mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES"); 732277177Srrs if (pos != -1) { 733277177Srrs memsplit = mem_split->vals[pos] * 1.0; 734277177Srrs memstore = mem_stores->vals[pos] * 1.0; 735277177Srrs } else { 736277177Srrs memsplit = mem_split->sum * 1.0; 737277177Srrs memstore = mem_stores->sum * 1.0; 738277177Srrs } 739292043Srrs res = memsplit/memstore; 740277177Srrs ret = printf("%1.3f", res); 741292043Srrs return(ret); 742277177Srrs} 743277177Srrs 744277177Srrs 745292043Srrs 746277177Srrsstatic int 747292043Srrssplitstore(struct counters *cpu, int pos) 748292043Srrs{ 749292043Srrs /* 5 - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */ 750292043Srrs int ret; 751292043Srrs struct counters *mem_split; 752292043Srrs struct counters *mem_stores; 753292043Srrs double memsplit, memstore, res; 754292043Srrs mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES"); 755292043Srrs mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES"); 756292043Srrs if (pos != -1) { 757292043Srrs memsplit = mem_split->vals[pos] * 1.0; 758292043Srrs memstore = mem_stores->vals[pos] * 1.0; 759292043Srrs } else { 760292043Srrs memsplit = mem_split->sum * 1.0; 761292043Srrs memstore = mem_stores->sum * 1.0; 762292043Srrs } 763292043Srrs res = memsplit/memstore; 764292043Srrs ret = printf("%1.3f", res); 765292043Srrs return(ret); 766292043Srrs} 767292043Srrs 768292043Srrs 769292043Srrsstatic int 770277177Srrscontested(struct counters *cpu, int pos) 771277177Srrs{ 772292043Srrs /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 773277177Srrs int ret; 774277177Srrs struct counters *mem; 775277177Srrs struct counters *unhalt; 776277177Srrs double con, un, memd, res; 777277177Srrs 778277177Srrs con = 60.0; 779277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 780277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 781277177Srrs if (pos != -1) { 782277177Srrs memd = mem->vals[pos] * 1.0; 783277177Srrs un = unhalt->vals[pos] * 1.0; 784277177Srrs } else { 785277177Srrs memd = mem->sum * 1.0; 786277177Srrs un = unhalt->sum * 1.0; 787277177Srrs } 788292043Srrs res = (memd * con)/un; 789277177Srrs ret = printf("%1.3f", res); 790292043Srrs return(ret); 791277177Srrs} 792277177Srrs 793277177Srrsstatic int 794277177Srrscontested_has(struct counters *cpu, int pos) 795277177Srrs{ 796292043Srrs /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 797277177Srrs int ret; 798277177Srrs struct counters *mem; 799277177Srrs struct counters *unhalt; 800277177Srrs double con, un, memd, res; 801277177Srrs 802277177Srrs con = 84.0; 803277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 804277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 805277177Srrs if (pos != -1) { 806277177Srrs memd = mem->vals[pos] * 1.0; 807277177Srrs un = unhalt->vals[pos] * 1.0; 808277177Srrs } else { 809277177Srrs memd = mem->sum * 1.0; 810277177Srrs un = unhalt->sum * 1.0; 811277177Srrs } 812292043Srrs res = (memd * con)/un; 813277177Srrs ret = printf("%1.3f", res); 814292043Srrs return(ret); 815277177Srrs} 816277177Srrs 817292027Srrsstatic int 818292027Srrscontestedbroad(struct counters *cpu, int pos) 819292027Srrs{ 820292043Srrs /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ 821292027Srrs int ret; 822292027Srrs struct counters *mem; 823292027Srrs struct counters *mem2; 824292027Srrs struct counters *unhalt; 825292027Srrs double con, un, memd, memtoo, res; 826277177Srrs 827292027Srrs con = 84.0; 828292027Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 829292027Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 830292043Srrs mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS"); 831292027Srrs 832292027Srrs if (pos != -1) { 833292027Srrs memd = mem->vals[pos] * 1.0; 834292027Srrs memtoo = mem2->vals[pos] * 1.0; 835292027Srrs un = unhalt->vals[pos] * 1.0; 836292027Srrs } else { 837292027Srrs memd = mem->sum * 1.0; 838292027Srrs memtoo = mem2->sum * 1.0; 839292027Srrs un = unhalt->sum * 1.0; 840292027Srrs } 841292043Srrs res = ((memd * con) + memtoo)/un; 842292027Srrs ret = printf("%1.3f", res); 843292043Srrs return(ret); 844292027Srrs} 845292027Srrs 846292027Srrs 847277177Srrsstatic int 848277177Srrsblockstoreforward(struct counters *cpu, int pos) 849277177Srrs{ 850292043Srrs /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/ 851277177Srrs int ret; 852277177Srrs struct counters *ldb; 853277177Srrs struct counters *unhalt; 854277177Srrs double con, un, ld, res; 855277177Srrs 856277177Srrs con = 13.0; 857277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 858277177Srrs ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD"); 859277177Srrs if (pos != -1) { 860277177Srrs ld = ldb->vals[pos] * 1.0; 861277177Srrs un = unhalt->vals[pos] * 1.0; 862277177Srrs } else { 863277177Srrs ld = ldb->sum * 1.0; 864277177Srrs un = unhalt->sum * 1.0; 865277177Srrs } 866292043Srrs res = (ld * con)/un; 867277177Srrs ret = printf("%1.3f", res); 868292043Srrs return(ret); 869277177Srrs} 870277177Srrs 871277177Srrsstatic int 872277177Srrscache2(struct counters *cpu, int pos) 873277177Srrs{ 874292043Srrs /* ** Suspect *** 875292043Srrs * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + 876292043Srrs * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 877277177Srrs */ 878277177Srrs int ret; 879277177Srrs struct counters *mem1, *mem2, *mem3; 880277177Srrs struct counters *unhalt; 881277177Srrs double con1, con2, con3, un, me_1, me_2, me_3, res; 882277177Srrs 883277177Srrs con1 = 26.0; 884277177Srrs con2 = 43.0; 885277177Srrs con3 = 60.0; 886277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 887277177Srrs/* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/ 888277177Srrs mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 889277177Srrs mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 890277177Srrs mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 891277177Srrs if (pos != -1) { 892277177Srrs me_1 = mem1->vals[pos] * 1.0; 893277177Srrs me_2 = mem2->vals[pos] * 1.0; 894277177Srrs me_3 = mem3->vals[pos] * 1.0; 895277177Srrs un = unhalt->vals[pos] * 1.0; 896277177Srrs } else { 897277177Srrs me_1 = mem1->sum * 1.0; 898277177Srrs me_2 = mem2->sum * 1.0; 899277177Srrs me_3 = mem3->sum * 1.0; 900277177Srrs un = unhalt->sum * 1.0; 901277177Srrs } 902292043Srrs res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un; 903277177Srrs ret = printf("%1.3f", res); 904292043Srrs return(ret); 905277177Srrs} 906277177Srrs 907277177Srrsstatic int 908277177Srrsdatasharing(struct counters *cpu, int pos) 909277177Srrs{ 910292043Srrs /* 911292043Srrs * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 912277177Srrs */ 913277177Srrs int ret; 914277177Srrs struct counters *mem; 915277177Srrs struct counters *unhalt; 916277177Srrs double con, res, me, un; 917277177Srrs 918277177Srrs con = 43.0; 919277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 920277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 921277177Srrs if (pos != -1) { 922277177Srrs me = mem->vals[pos] * 1.0; 923277177Srrs un = unhalt->vals[pos] * 1.0; 924277177Srrs } else { 925277177Srrs me = mem->sum * 1.0; 926277177Srrs un = unhalt->sum * 1.0; 927277177Srrs } 928292043Srrs res = (me * con)/un; 929277177Srrs ret = printf("%1.3f", res); 930292043Srrs return(ret); 931277177Srrs 932277177Srrs} 933277177Srrs 934277177Srrs 935277177Srrsstatic int 936277177Srrsdatasharing_has(struct counters *cpu, int pos) 937277177Srrs{ 938292043Srrs /* 939292043Srrs * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 940277177Srrs */ 941277177Srrs int ret; 942277177Srrs struct counters *mem; 943277177Srrs struct counters *unhalt; 944277177Srrs double con, res, me, un; 945277177Srrs 946277177Srrs con = 72.0; 947277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 948277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 949277177Srrs if (pos != -1) { 950277177Srrs me = mem->vals[pos] * 1.0; 951277177Srrs un = unhalt->vals[pos] * 1.0; 952277177Srrs } else { 953277177Srrs me = mem->sum * 1.0; 954277177Srrs un = unhalt->sum * 1.0; 955277177Srrs } 956292043Srrs res = (me * con)/un; 957277177Srrs ret = printf("%1.3f", res); 958292043Srrs return(ret); 959277177Srrs 960277177Srrs} 961277177Srrs 962277177Srrs 963277177Srrsstatic int 964277177Srrscache2ib(struct counters *cpu, int pos) 965277177Srrs{ 966292043Srrs /* 967292043Srrs * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 968277177Srrs */ 969277177Srrs int ret; 970277177Srrs struct counters *mem; 971277177Srrs struct counters *unhalt; 972277177Srrs double con, un, me, res; 973277177Srrs 974277177Srrs con = 29.0; 975277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 976277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 977277177Srrs if (pos != -1) { 978277177Srrs me = mem->vals[pos] * 1.0; 979277177Srrs un = unhalt->vals[pos] * 1.0; 980277177Srrs } else { 981277177Srrs me = mem->sum * 1.0; 982277177Srrs un = unhalt->sum * 1.0; 983277177Srrs } 984292043Srrs res = (con * me)/un; 985277177Srrs ret = printf("%1.3f", res); 986292043Srrs return(ret); 987277177Srrs} 988277177Srrs 989277177Srrsstatic int 990277177Srrscache2has(struct counters *cpu, int pos) 991277177Srrs{ 992277177Srrs /* 993277177Srrs * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \ 994292043Srrs * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + 995292043Srrs * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) 996292043Srrs * / CPU_CLK_UNHALTED.THREAD_P 997277177Srrs */ 998277177Srrs int ret; 999277177Srrs struct counters *mem1, *mem2, *mem3; 1000277177Srrs struct counters *unhalt; 1001277177Srrs double con1, con2, con3, un, me1, me2, me3, res; 1002277177Srrs 1003277177Srrs con1 = 36.0; 1004277177Srrs con2 = 72.0; 1005277177Srrs con3 = 84.0; 1006277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1007277177Srrs mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); 1008277177Srrs mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); 1009277177Srrs mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); 1010277177Srrs if (pos != -1) { 1011277177Srrs me1 = mem1->vals[pos] * 1.0; 1012277177Srrs me2 = mem2->vals[pos] * 1.0; 1013277177Srrs me3 = mem3->vals[pos] * 1.0; 1014277177Srrs un = unhalt->vals[pos] * 1.0; 1015277177Srrs } else { 1016277177Srrs me1 = mem1->sum * 1.0; 1017277177Srrs me2 = mem2->sum * 1.0; 1018277177Srrs me3 = mem3->sum * 1.0; 1019277177Srrs un = unhalt->sum * 1.0; 1020277177Srrs } 1021292043Srrs res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un; 1022277177Srrs ret = printf("%1.3f", res); 1023292043Srrs return(ret); 1024277177Srrs} 1025277177Srrs 1026292043Srrs 1027277177Srrsstatic int 1028292027Srrscache2broad(struct counters *cpu, int pos) 1029292027Srrs{ 1030292043Srrs /* 1031292043Srrs * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) 1032292027Srrs */ 1033292027Srrs int ret; 1034292027Srrs struct counters *mem; 1035292027Srrs struct counters *unhalt; 1036292027Srrs double con, un, me, res; 1037292027Srrs 1038292027Srrs con = 36.0; 1039292027Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1040292027Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT"); 1041292027Srrs if (pos != -1) { 1042292027Srrs me = mem->vals[pos] * 1.0; 1043292027Srrs un = unhalt->vals[pos] * 1.0; 1044292027Srrs } else { 1045292027Srrs me = mem->sum * 1.0; 1046292027Srrs un = unhalt->sum * 1.0; 1047292027Srrs } 1048292043Srrs res = (con * me)/un; 1049292027Srrs ret = printf("%1.3f", res); 1050292043Srrs return(ret); 1051292027Srrs} 1052292027Srrs 1053292027Srrs 1054292027Srrsstatic int 1055277177Srrscache1(struct counters *cpu, int pos) 1056277177Srrs{ 1057292043Srrs /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1058277177Srrs int ret; 1059277177Srrs struct counters *mem; 1060277177Srrs struct counters *unhalt; 1061277177Srrs double con, un, me, res; 1062277177Srrs 1063277177Srrs con = 180.0; 1064277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1065277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS"); 1066277177Srrs if (pos != -1) { 1067277177Srrs me = mem->vals[pos] * 1.0; 1068277177Srrs un = unhalt->vals[pos] * 1.0; 1069277177Srrs } else { 1070277177Srrs me = mem->sum * 1.0; 1071277177Srrs un = unhalt->sum * 1.0; 1072277177Srrs } 1073292043Srrs res = (me * con)/un; 1074277177Srrs ret = printf("%1.3f", res); 1075292043Srrs return(ret); 1076277177Srrs} 1077277177Srrs 1078277177Srrsstatic int 1079277177Srrscache1ib(struct counters *cpu, int pos) 1080277177Srrs{ 1081292043Srrs /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1082277177Srrs int ret; 1083277177Srrs struct counters *mem; 1084277177Srrs struct counters *unhalt; 1085277177Srrs double con, un, me, res; 1086277177Srrs 1087277177Srrs con = 180.0; 1088277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1089277177Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM"); 1090277177Srrs if (pos != -1) { 1091277177Srrs me = mem->vals[pos] * 1.0; 1092277177Srrs un = unhalt->vals[pos] * 1.0; 1093277177Srrs } else { 1094277177Srrs me = mem->sum * 1.0; 1095277177Srrs un = unhalt->sum * 1.0; 1096277177Srrs } 1097292043Srrs res = (me * con)/un; 1098277177Srrs ret = printf("%1.3f", res); 1099292043Srrs return(ret); 1100277177Srrs} 1101277177Srrs 1102277177Srrs 1103277177Srrsstatic int 1104292027Srrscache1broad(struct counters *cpu, int pos) 1105292027Srrs{ 1106292043Srrs /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ 1107292027Srrs int ret; 1108292027Srrs struct counters *mem; 1109292027Srrs struct counters *unhalt; 1110292027Srrs double con, un, me, res; 1111292027Srrs 1112292027Srrs con = 180.0; 1113292027Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1114292027Srrs mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS"); 1115292027Srrs if (pos != -1) { 1116292027Srrs me = mem->vals[pos] * 1.0; 1117292027Srrs un = unhalt->vals[pos] * 1.0; 1118292027Srrs } else { 1119292027Srrs me = mem->sum * 1.0; 1120292027Srrs un = unhalt->sum * 1.0; 1121292027Srrs } 1122292043Srrs res = (me * con)/un; 1123292027Srrs ret = printf("%1.3f", res); 1124292043Srrs return(ret); 1125292027Srrs} 1126292027Srrs 1127292027Srrs 1128292027Srrsstatic int 1129277177Srrsdtlb_missload(struct counters *cpu, int pos) 1130277177Srrs{ 1131292043Srrs /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */ 1132277177Srrs int ret; 1133277177Srrs struct counters *dtlb_m, *dtlb_d; 1134277177Srrs struct counters *unhalt; 1135277177Srrs double con, un, d1, d2, res; 1136277177Srrs 1137277177Srrs con = 7.0; 1138277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1139277177Srrs dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT"); 1140277177Srrs dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION"); 1141277177Srrs if (pos != -1) { 1142277177Srrs d1 = dtlb_m->vals[pos] * 1.0; 1143277177Srrs d2 = dtlb_d->vals[pos] * 1.0; 1144277177Srrs un = unhalt->vals[pos] * 1.0; 1145277177Srrs } else { 1146277177Srrs d1 = dtlb_m->sum * 1.0; 1147277177Srrs d2 = dtlb_d->sum * 1.0; 1148277177Srrs un = unhalt->sum * 1.0; 1149277177Srrs } 1150292043Srrs res = ((d1 * con) + d2)/un; 1151277177Srrs ret = printf("%1.3f", res); 1152292043Srrs return(ret); 1153277177Srrs} 1154277177Srrs 1155277177Srrsstatic int 1156277177Srrsdtlb_missstore(struct counters *cpu, int pos) 1157277177Srrs{ 1158292043Srrs /* 1159292043Srrs * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / 1160292043Srrs * CPU_CLK_UNHALTED.THREAD_P (t >= .1) 1161277177Srrs */ 1162292043Srrs int ret; 1163292043Srrs struct counters *dtsb_m, *dtsb_d; 1164292043Srrs struct counters *unhalt; 1165292043Srrs double con, un, d1, d2, res; 1166277177Srrs 1167292043Srrs con = 7.0; 1168292043Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1169292043Srrs dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT"); 1170292043Srrs dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION"); 1171292043Srrs if (pos != -1) { 1172292043Srrs d1 = dtsb_m->vals[pos] * 1.0; 1173292043Srrs d2 = dtsb_d->vals[pos] * 1.0; 1174292043Srrs un = unhalt->vals[pos] * 1.0; 1175292043Srrs } else { 1176292043Srrs d1 = dtsb_m->sum * 1.0; 1177292043Srrs d2 = dtsb_d->sum * 1.0; 1178292043Srrs un = unhalt->sum * 1.0; 1179292043Srrs } 1180292043Srrs res = ((d1 * con) + d2)/un; 1181292043Srrs ret = printf("%1.3f", res); 1182292043Srrs return(ret); 1183277177Srrs} 1184277177Srrs 1185277177Srrsstatic int 1186277177Srrsitlb_miss(struct counters *cpu, int pos) 1187277177Srrs{ 1188277177Srrs /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */ 1189277177Srrs int ret; 1190277177Srrs struct counters *itlb; 1191277177Srrs struct counters *unhalt; 1192277177Srrs double un, d1, res; 1193277177Srrs 1194277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1195277177Srrs itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1196277177Srrs if (pos != -1) { 1197277177Srrs d1 = itlb->vals[pos] * 1.0; 1198277177Srrs un = unhalt->vals[pos] * 1.0; 1199277177Srrs } else { 1200277177Srrs d1 = itlb->sum * 1.0; 1201277177Srrs un = unhalt->sum * 1.0; 1202277177Srrs } 1203292043Srrs res = d1/un; 1204277177Srrs ret = printf("%1.3f", res); 1205292043Srrs return(ret); 1206277177Srrs} 1207277177Srrs 1208292027Srrs 1209277177Srrsstatic int 1210292027Srrsitlb_miss_broad(struct counters *cpu, int pos) 1211292027Srrs{ 1212292043Srrs /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */ 1213292027Srrs int ret; 1214292027Srrs struct counters *itlb; 1215292027Srrs struct counters *unhalt; 1216292027Srrs struct counters *four_k; 1217292027Srrs double un, d1, res, k; 1218292027Srrs 1219292027Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1220292027Srrs itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1221292027Srrs four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K"); 1222292027Srrs if (pos != -1) { 1223292027Srrs d1 = itlb->vals[pos] * 1.0; 1224292027Srrs un = unhalt->vals[pos] * 1.0; 1225292027Srrs k = four_k->vals[pos] * 1.0; 1226292027Srrs } else { 1227292027Srrs d1 = itlb->sum * 1.0; 1228292027Srrs un = unhalt->sum * 1.0; 1229292027Srrs k = four_k->sum * 1.0; 1230292027Srrs } 1231292043Srrs res = (7.0 * k + d1)/un; 1232292027Srrs ret = printf("%1.3f", res); 1233292043Srrs return(ret); 1234292027Srrs} 1235292027Srrs 1236292027Srrs 1237292027Srrsstatic int 1238277177Srrsicache_miss(struct counters *cpu, int pos) 1239277177Srrs{ 1240292043Srrs /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */ 1241277177Srrs 1242277177Srrs int ret; 1243277177Srrs struct counters *itlb, *icache; 1244277177Srrs struct counters *unhalt; 1245277177Srrs double un, d1, ic, res; 1246277177Srrs 1247277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1248277177Srrs itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); 1249277177Srrs icache = find_counter(cpu, "ICACHE.IFETCH_STALL"); 1250277177Srrs if (pos != -1) { 1251277177Srrs d1 = itlb->vals[pos] * 1.0; 1252277177Srrs ic = icache->vals[pos] * 1.0; 1253277177Srrs un = unhalt->vals[pos] * 1.0; 1254277177Srrs } else { 1255277177Srrs d1 = itlb->sum * 1.0; 1256277177Srrs ic = icache->sum * 1.0; 1257277177Srrs un = unhalt->sum * 1.0; 1258277177Srrs } 1259292043Srrs res = (ic-d1)/un; 1260277177Srrs ret = printf("%1.3f", res); 1261292043Srrs return(ret); 1262277177Srrs 1263277177Srrs} 1264277177Srrs 1265277177Srrsstatic int 1266277177Srrsicache_miss_has(struct counters *cpu, int pos) 1267277177Srrs{ 1268277177Srrs /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */ 1269277177Srrs 1270277177Srrs int ret; 1271277177Srrs struct counters *icache; 1272277177Srrs struct counters *unhalt; 1273277177Srrs double un, con, ic, res; 1274277177Srrs 1275277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1276277177Srrs icache = find_counter(cpu, "ICACHE.MISSES"); 1277277177Srrs con = 36.0; 1278277177Srrs if (pos != -1) { 1279277177Srrs ic = icache->vals[pos] * 1.0; 1280277177Srrs un = unhalt->vals[pos] * 1.0; 1281277177Srrs } else { 1282277177Srrs ic = icache->sum * 1.0; 1283277177Srrs un = unhalt->sum * 1.0; 1284277177Srrs } 1285292043Srrs res = (con * ic)/un; 1286277177Srrs ret = printf("%1.3f", res); 1287292043Srrs return(ret); 1288277177Srrs 1289277177Srrs} 1290277177Srrs 1291277177Srrsstatic int 1292277177Srrslcp_stall(struct counters *cpu, int pos) 1293277177Srrs{ 1294292043Srrs /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */ 1295277177Srrs int ret; 1296277177Srrs struct counters *ild; 1297277177Srrs struct counters *unhalt; 1298277177Srrs double un, d1, res; 1299277177Srrs 1300277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1301277177Srrs ild = find_counter(cpu, "ILD_STALL.LCP"); 1302277177Srrs if (pos != -1) { 1303277177Srrs d1 = ild->vals[pos] * 1.0; 1304277177Srrs un = unhalt->vals[pos] * 1.0; 1305277177Srrs } else { 1306277177Srrs d1 = ild->sum * 1.0; 1307277177Srrs un = unhalt->sum * 1.0; 1308277177Srrs } 1309292043Srrs res = d1/un; 1310277177Srrs ret = printf("%1.3f", res); 1311292043Srrs return(ret); 1312277177Srrs 1313277177Srrs} 1314277177Srrs 1315277177Srrs 1316277177Srrsstatic int 1317277177Srrsfrontendstall(struct counters *cpu, int pos) 1318277177Srrs{ 1319292043Srrs /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */ 1320277177Srrs int ret; 1321277177Srrs struct counters *idq; 1322277177Srrs struct counters *unhalt; 1323277177Srrs double con, un, id, res; 1324277177Srrs 1325277177Srrs con = 4.0; 1326277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1327277177Srrs idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE"); 1328277177Srrs if (pos != -1) { 1329277177Srrs id = idq->vals[pos] * 1.0; 1330277177Srrs un = unhalt->vals[pos] * 1.0; 1331277177Srrs } else { 1332277177Srrs id = idq->sum * 1.0; 1333277177Srrs un = unhalt->sum * 1.0; 1334277177Srrs } 1335292043Srrs res = id/(un * con); 1336277177Srrs ret = printf("%1.3f", res); 1337292043Srrs return(ret); 1338277177Srrs} 1339277177Srrs 1340277177Srrsstatic int 1341277177Srrsclears(struct counters *cpu, int pos) 1342277177Srrs{ 1343292043Srrs /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 ) 1344292043Srrs * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/ 1345292043Srrs 1346277177Srrs int ret; 1347277177Srrs struct counters *clr1, *clr2, *clr3; 1348277177Srrs struct counters *unhalt; 1349277177Srrs double con, un, cl1, cl2, cl3, res; 1350277177Srrs 1351277177Srrs con = 100.0; 1352277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1353277177Srrs clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 1354277177Srrs clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 1355277177Srrs clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 1356292043Srrs 1357277177Srrs if (pos != -1) { 1358277177Srrs cl1 = clr1->vals[pos] * 1.0; 1359277177Srrs cl2 = clr2->vals[pos] * 1.0; 1360277177Srrs cl3 = clr3->vals[pos] * 1.0; 1361277177Srrs un = unhalt->vals[pos] * 1.0; 1362277177Srrs } else { 1363277177Srrs cl1 = clr1->sum * 1.0; 1364277177Srrs cl2 = clr2->sum * 1.0; 1365277177Srrs cl3 = clr3->sum * 1.0; 1366277177Srrs un = unhalt->sum * 1.0; 1367277177Srrs } 1368292043Srrs res = ((cl1 + cl2 + cl3) * con)/un; 1369277177Srrs ret = printf("%1.3f", res); 1370292043Srrs return(ret); 1371277177Srrs} 1372277177Srrs 1373292043Srrs 1374292043Srrs 1375277177Srrsstatic int 1376292027Srrsclears_broad(struct counters *cpu, int pos) 1377292027Srrs{ 1378292027Srrs int ret; 1379292027Srrs struct counters *clr1, *clr2, *clr3, *cyc; 1380292027Srrs struct counters *unhalt; 1381292027Srrs double con, un, cl1, cl2, cl3, cy, res; 1382292027Srrs 1383292027Srrs con = 100.0; 1384292027Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1385292027Srrs clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); 1386292027Srrs clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); 1387292027Srrs clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); 1388292027Srrs cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); 1389292027Srrs if (pos != -1) { 1390292027Srrs cl1 = clr1->vals[pos] * 1.0; 1391292027Srrs cl2 = clr2->vals[pos] * 1.0; 1392292027Srrs cl3 = clr3->vals[pos] * 1.0; 1393292027Srrs cy = cyc->vals[pos] * 1.0; 1394292027Srrs un = unhalt->vals[pos] * 1.0; 1395292027Srrs } else { 1396292027Srrs cl1 = clr1->sum * 1.0; 1397292027Srrs cl2 = clr2->sum * 1.0; 1398292027Srrs cl3 = clr3->sum * 1.0; 1399292027Srrs cy = cyc->sum * 1.0; 1400292027Srrs un = unhalt->sum * 1.0; 1401292027Srrs } 1402292027Srrs /* Formula not listed but extrapulated to add the cy ?? */ 1403292043Srrs res = ((cl1 + cl2 + cl3 + cy) * con)/un; 1404292027Srrs ret = printf("%1.3f", res); 1405292043Srrs return(ret); 1406292027Srrs} 1407292027Srrs 1408292043Srrs 1409292043Srrs 1410292043Srrs 1411292043Srrs 1412292027Srrsstatic int 1413277177Srrsmicroassist(struct counters *cpu, int pos) 1414277177Srrs{ 1415277177Srrs /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */ 1416277177Srrs int ret; 1417277177Srrs struct counters *idq; 1418277177Srrs struct counters *unhalt; 1419277177Srrs double un, id, res, con; 1420277177Srrs 1421277177Srrs con = 4.0; 1422277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1423277177Srrs idq = find_counter(cpu, "IDQ.MS_UOPS"); 1424277177Srrs if (pos != -1) { 1425277177Srrs id = idq->vals[pos] * 1.0; 1426277177Srrs un = unhalt->vals[pos] * 1.0; 1427277177Srrs } else { 1428277177Srrs id = idq->sum * 1.0; 1429277177Srrs un = unhalt->sum * 1.0; 1430277177Srrs } 1431292043Srrs res = id/(un * con); 1432277177Srrs ret = printf("%1.3f", res); 1433292043Srrs return(ret); 1434277177Srrs} 1435277177Srrs 1436292043Srrs 1437292027Srrsstatic int 1438292027Srrsmicroassist_broad(struct counters *cpu, int pos) 1439292027Srrs{ 1440292027Srrs int ret; 1441292027Srrs struct counters *idq; 1442292027Srrs struct counters *unhalt; 1443292027Srrs struct counters *uopiss; 1444292027Srrs struct counters *uopret; 1445292027Srrs double un, id, res, con, uoi, uor; 1446277177Srrs 1447292027Srrs con = 4.0; 1448292027Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1449292027Srrs idq = find_counter(cpu, "IDQ.MS_UOPS"); 1450292027Srrs uopiss = find_counter(cpu, "UOPS_ISSUED.ANY"); 1451292027Srrs uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 1452292027Srrs if (pos != -1) { 1453292027Srrs id = idq->vals[pos] * 1.0; 1454292027Srrs un = unhalt->vals[pos] * 1.0; 1455292027Srrs uoi = uopiss->vals[pos] * 1.0; 1456292027Srrs uor = uopret->vals[pos] * 1.0; 1457292027Srrs } else { 1458292027Srrs id = idq->sum * 1.0; 1459292027Srrs un = unhalt->sum * 1.0; 1460292027Srrs uoi = uopiss->sum * 1.0; 1461292027Srrs uor = uopret->sum * 1.0; 1462292027Srrs } 1463292043Srrs res = (uor/uoi) * (id/(un * con)); 1464292027Srrs ret = printf("%1.3f", res); 1465292043Srrs return(ret); 1466292027Srrs} 1467292027Srrs 1468292043Srrs 1469277177Srrsstatic int 1470277177Srrsaliasing(struct counters *cpu, int pos) 1471277177Srrs{ 1472292043Srrs /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ 1473292043Srrs int ret; 1474277177Srrs struct counters *ld; 1475277177Srrs struct counters *unhalt; 1476277177Srrs double un, lds, con, res; 1477277177Srrs 1478277177Srrs con = 5.0; 1479277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1480277177Srrs ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); 1481277177Srrs if (pos != -1) { 1482277177Srrs lds = ld->vals[pos] * 1.0; 1483277177Srrs un = unhalt->vals[pos] * 1.0; 1484277177Srrs } else { 1485277177Srrs lds = ld->sum * 1.0; 1486277177Srrs un = unhalt->sum * 1.0; 1487277177Srrs } 1488292043Srrs res = (lds * con)/un; 1489277177Srrs ret = printf("%1.3f", res); 1490292043Srrs return(ret); 1491277177Srrs} 1492277177Srrs 1493277177Srrsstatic int 1494292027Srrsaliasing_broad(struct counters *cpu, int pos) 1495292027Srrs{ 1496292043Srrs /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ 1497292043Srrs int ret; 1498292027Srrs struct counters *ld; 1499292027Srrs struct counters *unhalt; 1500292027Srrs double un, lds, con, res; 1501292027Srrs 1502292027Srrs con = 7.0; 1503292027Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1504292027Srrs ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); 1505292027Srrs if (pos != -1) { 1506292027Srrs lds = ld->vals[pos] * 1.0; 1507292027Srrs un = unhalt->vals[pos] * 1.0; 1508292027Srrs } else { 1509292027Srrs lds = ld->sum * 1.0; 1510292027Srrs un = unhalt->sum * 1.0; 1511292027Srrs } 1512292043Srrs res = (lds * con)/un; 1513292027Srrs ret = printf("%1.3f", res); 1514292043Srrs return(ret); 1515292027Srrs} 1516292027Srrs 1517292027Srrs 1518292027Srrsstatic int 1519277177Srrsfpassists(struct counters *cpu, int pos) 1520277177Srrs{ 1521277177Srrs /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */ 1522292043Srrs int ret; 1523277177Srrs struct counters *fp; 1524277177Srrs struct counters *inst; 1525277177Srrs double un, fpd, res; 1526277177Srrs 1527277177Srrs inst = find_counter(cpu, "INST_RETIRED.ANY_P"); 1528277177Srrs fp = find_counter(cpu, "FP_ASSIST.ANY"); 1529277177Srrs if (pos != -1) { 1530277177Srrs fpd = fp->vals[pos] * 1.0; 1531277177Srrs un = inst->vals[pos] * 1.0; 1532277177Srrs } else { 1533277177Srrs fpd = fp->sum * 1.0; 1534277177Srrs un = inst->sum * 1.0; 1535277177Srrs } 1536292043Srrs res = fpd/un; 1537277177Srrs ret = printf("%1.3f", res); 1538292043Srrs return(ret); 1539277177Srrs} 1540277177Srrs 1541277177Srrsstatic int 1542277177Srrsotherassistavx(struct counters *cpu, int pos) 1543277177Srrs{ 1544292043Srrs /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1545292043Srrs int ret; 1546277177Srrs struct counters *oth; 1547277177Srrs struct counters *unhalt; 1548277177Srrs double un, ot, con, res; 1549277177Srrs 1550277177Srrs con = 75.0; 1551277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1552277177Srrs oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE"); 1553277177Srrs if (pos != -1) { 1554277177Srrs ot = oth->vals[pos] * 1.0; 1555277177Srrs un = unhalt->vals[pos] * 1.0; 1556277177Srrs } else { 1557277177Srrs ot = oth->sum * 1.0; 1558277177Srrs un = unhalt->sum * 1.0; 1559277177Srrs } 1560292043Srrs res = (ot * con)/un; 1561277177Srrs ret = printf("%1.3f", res); 1562292043Srrs return(ret); 1563277177Srrs} 1564277177Srrs 1565277177Srrsstatic int 1566277177Srrsotherassistsse(struct counters *cpu, int pos) 1567277177Srrs{ 1568277177Srrs 1569292043Srrs int ret; 1570277177Srrs struct counters *oth; 1571277177Srrs struct counters *unhalt; 1572277177Srrs double un, ot, con, res; 1573277177Srrs 1574292043Srrs /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ 1575277177Srrs con = 75.0; 1576277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1577277177Srrs oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX"); 1578277177Srrs if (pos != -1) { 1579277177Srrs ot = oth->vals[pos] * 1.0; 1580277177Srrs un = unhalt->vals[pos] * 1.0; 1581277177Srrs } else { 1582277177Srrs ot = oth->sum * 1.0; 1583277177Srrs un = unhalt->sum * 1.0; 1584277177Srrs } 1585292043Srrs res = (ot * con)/un; 1586277177Srrs ret = printf("%1.3f", res); 1587292043Srrs return(ret); 1588277177Srrs} 1589277177Srrs 1590277177Srrsstatic int 1591277177Srrsefficiency1(struct counters *cpu, int pos) 1592277177Srrs{ 1593277177Srrs 1594292043Srrs int ret; 1595277177Srrs struct counters *uops; 1596277177Srrs struct counters *unhalt; 1597277177Srrs double un, ot, con, res; 1598277177Srrs 1599292043Srrs /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/ 1600277177Srrs con = 4.0; 1601277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1602277177Srrs uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); 1603277177Srrs if (pos != -1) { 1604277177Srrs ot = uops->vals[pos] * 1.0; 1605277177Srrs un = unhalt->vals[pos] * 1.0; 1606277177Srrs } else { 1607277177Srrs ot = uops->sum * 1.0; 1608277177Srrs un = unhalt->sum * 1.0; 1609277177Srrs } 1610292043Srrs res = ot/(con * un); 1611277177Srrs ret = printf("%1.3f", res); 1612292043Srrs return(ret); 1613277177Srrs} 1614277177Srrs 1615277177Srrsstatic int 1616277177Srrsefficiency2(struct counters *cpu, int pos) 1617277177Srrs{ 1618277177Srrs 1619292043Srrs int ret; 1620277177Srrs struct counters *uops; 1621277177Srrs struct counters *unhalt; 1622277177Srrs double un, ot, res; 1623277177Srrs 1624292043Srrs /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/ 1625277177Srrs unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); 1626277177Srrs uops = find_counter(cpu, "INST_RETIRED.ANY_P"); 1627277177Srrs if (pos != -1) { 1628277177Srrs ot = uops->vals[pos] * 1.0; 1629277177Srrs un = unhalt->vals[pos] * 1.0; 1630277177Srrs } else { 1631277177Srrs ot = uops->sum * 1.0; 1632277177Srrs un = unhalt->sum * 1.0; 1633277177Srrs } 1634292043Srrs res = un/ot; 1635277177Srrs ret = printf("%1.3f", res); 1636292043Srrs return(ret); 1637277177Srrs} 1638277177Srrs 1639292043Srrs#define SANDY_BRIDGE_COUNT 20 1640277177Srrsstatic struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = { 1641292043Srrs/*01*/ { "allocstall1", "thresh > .05", 1642292043Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1", 1643292043Srrs allocstall1, 2 }, 1644292043Srrs/* -- not defined for SB right (partial-rat_stalls) 02*/ 1645292043Srrs { "allocstall2", "thresh > .05", 1646292043Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1", 1647292043Srrs allocstall2, 2 }, 1648292043Srrs/*03*/ { "br_miss", "thresh >= .2", 1649292043Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1650292043Srrs br_mispredict, 2 }, 1651292043Srrs/*04*/ { "splitload", "thresh >= .1", 1652292043Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", 1653292043Srrs splitload_sb, 2 }, 1654292043Srrs/* 05*/ { "splitstore", "thresh >= .01", 1655292043Srrs "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", 1656292043Srrs splitstore_sb, 2 }, 1657292043Srrs/*06*/ { "contested", "thresh >= .05", 1658292043Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1659292043Srrs contested, 2 }, 1660292043Srrs/*07*/ { "blockstorefwd", "thresh >= .05", 1661292043Srrs "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1662292043Srrs blockstoreforward, 2 }, 1663292043Srrs/*08*/ { "cache2", "thresh >= .2", 1664292043Srrs "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1665292043Srrs cache2, 4 }, 1666292043Srrs/*09*/ { "cache1", "thresh >= .2", 1667292043Srrs "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1668292043Srrs cache1, 2 }, 1669292043Srrs/*10*/ { "dtlbmissload", "thresh >= .1", 1670292043Srrs "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1671292043Srrs dtlb_missload, 3 }, 1672292043Srrs/*11*/ { "dtlbmissstore", "thresh >= .05", 1673292043Srrs "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1674292043Srrs dtlb_missstore, 3 }, 1675292043Srrs/*12*/ { "frontendstall", "thresh >= .15", 1676292043Srrs "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1677292043Srrs frontendstall, 2 }, 1678292043Srrs/*13*/ { "clears", "thresh >= .02", 1679292043Srrs "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1680292043Srrs clears, 4 }, 1681292043Srrs/*14*/ { "microassist", "thresh >= .05", 1682292043Srrs "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1683292043Srrs microassist, 2 }, 1684292043Srrs/*15*/ { "aliasing_4k", "thresh >= .1", 1685292043Srrs "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1686292043Srrs aliasing, 2 }, 1687292043Srrs/*16*/ { "fpassist", "look for a excessive value", 1688292043Srrs "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1689292043Srrs fpassists, 2 }, 1690292043Srrs/*17*/ { "otherassistavx", "look for a excessive value", 1691292043Srrs "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1692292043Srrs otherassistavx, 2}, 1693292043Srrs/*18*/ { "otherassistsse", "look for a excessive value", 1694292043Srrs "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1695292043Srrs otherassistsse, 2 }, 1696292043Srrs/*19*/ { "eff1", "thresh < .9", 1697292043Srrs "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1698292043Srrs efficiency1, 2 }, 1699292043Srrs/*20*/ { "eff2", "thresh > 1.0", 1700292043Srrs "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1701292043Srrs efficiency2, 2 }, 1702277177Srrs}; 1703277177Srrs 1704277177Srrs 1705277177Srrs#define IVY_BRIDGE_COUNT 21 1706277177Srrsstatic struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = { 1707292043Srrs/*1*/ { "eff1", "thresh < .75", 1708292043Srrs "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1709292043Srrs efficiency1, 2 }, 1710292043Srrs/*2*/ { "eff2", "thresh > 1.0", 1711292043Srrs "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1712292043Srrs efficiency2, 2 }, 1713292043Srrs/*3*/ { "itlbmiss", "thresh > .05", 1714292043Srrs "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1715292043Srrs itlb_miss, 2 }, 1716292043Srrs/*4*/ { "icachemiss", "thresh > .05", 1717292043Srrs "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1718292043Srrs icache_miss, 3 }, 1719292043Srrs/*5*/ { "lcpstall", "thresh > .05", 1720292043Srrs "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1721292043Srrs lcp_stall, 2 }, 1722292043Srrs/*6*/ { "cache1", "thresh >= .2", 1723292043Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1724292043Srrs cache1ib, 2 }, 1725292043Srrs/*7*/ { "cache2", "thresh >= .2", 1726292043Srrs "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1727292043Srrs cache2ib, 2 }, 1728292043Srrs/*8*/ { "contested", "thresh >= .05", 1729292043Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1730292043Srrs contested, 2 }, 1731292043Srrs/*9*/ { "datashare", "thresh >= .05", 1732292043Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1733292043Srrs datasharing, 2 }, 1734292043Srrs/*10*/ { "blockstorefwd", "thresh >= .05", 1735292043Srrs "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1736292043Srrs blockstoreforward, 2 }, 1737292043Srrs/*11*/ { "splitload", "thresh >= .1", 1738292043Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", 1739292043Srrs splitloadib, 4 }, 1740292043Srrs/*12*/ { "splitstore", "thresh >= .01", 1741292043Srrs "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1", 1742292043Srrs splitstore, 2 }, 1743292043Srrs/*13*/ { "aliasing_4k", "thresh >= .1", 1744292043Srrs "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1745292043Srrs aliasing, 2 }, 1746292043Srrs/*14*/ { "dtlbmissload", "thresh >= .1", 1747292043Srrs "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1748292043Srrs dtlb_missload , 3}, 1749292043Srrs/*15*/ { "dtlbmissstore", "thresh >= .05", 1750292043Srrs "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1751292043Srrs dtlb_missstore, 3 }, 1752292043Srrs/*16*/ { "br_miss", "thresh >= .2", 1753292043Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", 1754292043Srrs br_mispredictib, 8 }, 1755292043Srrs/*17*/ { "clears", "thresh >= .02", 1756292043Srrs "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1757292043Srrs clears, 4 }, 1758292043Srrs/*18*/ { "microassist", "thresh >= .05", 1759292043Srrs "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1760292043Srrs microassist, 2 }, 1761292043Srrs/*19*/ { "fpassist", "look for a excessive value", 1762292043Srrs "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1763292043Srrs fpassists, 2 }, 1764292043Srrs/*20*/ { "otherassistavx", "look for a excessive value", 1765292043Srrs "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1766292043Srrs otherassistavx , 2}, 1767292043Srrs/*21*/ { "otherassistsse", "look for a excessive value", 1768292043Srrs "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1769292043Srrs otherassistsse, 2 }, 1770277177Srrs}; 1771277177Srrs 1772277177Srrs#define HASWELL_COUNT 20 1773277177Srrsstatic struct cpu_entry haswell[HASWELL_COUNT] = { 1774292043Srrs/*1*/ { "eff1", "thresh < .75", 1775292043Srrs "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1776292043Srrs efficiency1, 2 }, 1777292043Srrs/*2*/ { "eff2", "thresh > 1.0", 1778292043Srrs "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1779292043Srrs efficiency2, 2 }, 1780292043Srrs/*3*/ { "itlbmiss", "thresh > .05", 1781292043Srrs "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1782292043Srrs itlb_miss, 2 }, 1783292043Srrs/*4*/ { "icachemiss", "thresh > .05", 1784292043Srrs "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1785292043Srrs icache_miss_has, 2 }, 1786292043Srrs/*5*/ { "lcpstall", "thresh > .05", 1787292043Srrs "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1788292043Srrs lcp_stall, 2 }, 1789292043Srrs/*6*/ { "cache1", "thresh >= .2", 1790292043Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1791292043Srrs cache1ib, 2 }, 1792292043Srrs/*7*/ { "cache2", "thresh >= .2", 1793292043Srrs "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1794292043Srrs cache2has, 4 }, 1795292043Srrs/*8*/ { "contested", "thresh >= .05", 1796292043Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1797292043Srrs contested_has, 2 }, 1798292043Srrs/*9*/ { "datashare", "thresh >= .05", 1799292043Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1800292043Srrs datasharing_has, 2 }, 1801292043Srrs/*10*/ { "blockstorefwd", "thresh >= .05", 1802292043Srrs "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1803292043Srrs blockstoreforward, 2 }, 1804292043Srrs/*11*/ { "splitload", "thresh >= .1", 1805292043Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1", 1806292043Srrs splitload , 2}, 1807292043Srrs/*12*/ { "splitstore", "thresh >= .01", 1808292043Srrs "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1", 1809292043Srrs splitstore, 2 }, 1810292043Srrs/*13*/ { "aliasing_4k", "thresh >= .1", 1811292043Srrs "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1812292043Srrs aliasing, 2 }, 1813292043Srrs/*14*/ { "dtlbmissload", "thresh >= .1", 1814292043Srrs "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1815292043Srrs dtlb_missload, 3 }, 1816292043Srrs/*15*/ { "br_miss", "thresh >= .2", 1817292043Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", 1818292043Srrs br_mispredict, 2 }, 1819292043Srrs/*16*/ { "clears", "thresh >= .02", 1820292043Srrs "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1821292043Srrs clears, 4 }, 1822292043Srrs/*17*/ { "microassist", "thresh >= .05", 1823292043Srrs "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1824292043Srrs microassist, 2 }, 1825292043Srrs/*18*/ { "fpassist", "look for a excessive value", 1826292043Srrs "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1827292043Srrs fpassists, 2 }, 1828292043Srrs/*19*/ { "otherassistavx", "look for a excessive value", 1829292043Srrs "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1830292043Srrs otherassistavx, 2 }, 1831292043Srrs/*20*/ { "otherassistsse", "look for a excessive value", 1832292043Srrs "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1833292043Srrs otherassistsse, 2 }, 1834277177Srrs}; 1835277177Srrs 1836277177Srrs 1837277177Srrsstatic void 1838292027Srrsexplain_name_broad(const char *name) 1839292027Srrs{ 1840292027Srrs const char *mythresh; 1841292027Srrs if (strcmp(name, "eff1") == 0) { 1842292027Srrs printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); 1843292027Srrs mythresh = "thresh < .75"; 1844292027Srrs } else if (strcmp(name, "eff2") == 0) { 1845292027Srrs printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); 1846292027Srrs mythresh = "thresh > 1.0"; 1847292027Srrs } else if (strcmp(name, "itlbmiss") == 0) { 1848292027Srrs printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); 1849292043Srrs mythresh = "thresh > .05"; 1850292027Srrs } else if (strcmp(name, "icachemiss") == 0) { 1851292043Srrs printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n"); 1852292027Srrs mythresh = "thresh > .05"; 1853292027Srrs } else if (strcmp(name, "lcpstall") == 0) { 1854292027Srrs printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); 1855292027Srrs mythresh = "thresh > .05"; 1856292027Srrs } else if (strcmp(name, "cache1") == 0) { 1857292027Srrs printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); 1858292027Srrs mythresh = "thresh >= .1"; 1859292027Srrs } else if (strcmp(name, "cache2") == 0) { 1860292027Srrs printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n"); 1861292027Srrs mythresh = "thresh >= .2"; 1862292027Srrs } else if (strcmp(name, "contested") == 0) { 1863292027Srrs printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n"); 1864292027Srrs mythresh = "thresh >= .05"; 1865292027Srrs } else if (strcmp(name, "datashare") == 0) { 1866292027Srrs printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); 1867292027Srrs mythresh = "thresh > .05"; 1868292027Srrs } else if (strcmp(name, "blockstorefwd") == 0) { 1869292027Srrs printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); 1870292027Srrs mythresh = "thresh >= .05"; 1871292027Srrs } else if (strcmp(name, "aliasing_4k") == 0) { 1872292027Srrs printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n"); 1873292027Srrs mythresh = "thresh >= .1"; 1874292027Srrs } else if (strcmp(name, "dtlbmissload") == 0) { 1875292027Srrs printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); 1876292027Srrs printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); 1877292027Srrs mythresh = "thresh >= .1"; 1878292027Srrs 1879292027Srrs } else if (strcmp(name, "br_miss") == 0) { 1880292027Srrs printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n"); 1881292027Srrs printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n"); 1882292027Srrs printf("CPU_CLK_UNHALTED.THREAD * 4)\n"); 1883292027Srrs mythresh = "thresh >= .2"; 1884292027Srrs } else if (strcmp(name, "clears") == 0) { 1885292027Srrs printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); 1886292027Srrs printf(" MACHINE_CLEARS.SMC + \n"); 1887292027Srrs printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); 1888292027Srrs mythresh = "thresh >= .02"; 1889292027Srrs } else if (strcmp(name, "fpassist") == 0) { 1890292027Srrs printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); 1891292027Srrs mythresh = "look for a excessive value"; 1892292027Srrs } else if (strcmp(name, "otherassistavx") == 0) { 1893292027Srrs printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); 1894292027Srrs mythresh = "look for a excessive value"; 1895292027Srrs } else if (strcmp(name, "microassist") == 0) { 1896292027Srrs printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); 1897292027Srrs printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); 1898292027Srrs mythresh = "thresh >= .05"; 1899292027Srrs } else { 1900292027Srrs printf("Unknown name:%s\n", name); 1901292027Srrs mythresh = "unknown entry"; 1902292043Srrs } 1903292027Srrs printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); 1904292027Srrs} 1905292027Srrs 1906292027Srrs 1907292027Srrs#define BROADWELL_COUNT 17 1908292027Srrsstatic struct cpu_entry broadwell[BROADWELL_COUNT] = { 1909292043Srrs/*1*/ { "eff1", "thresh < .75", 1910292043Srrs "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1911292043Srrs efficiency1, 2 }, 1912292043Srrs/*2*/ { "eff2", "thresh > 1.0", 1913292043Srrs "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1914292043Srrs efficiency2, 2 }, 1915292043Srrs/*3*/ { "itlbmiss", "thresh > .05", 1916292043Srrs "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1", 1917292043Srrs itlb_miss_broad, 3 }, 1918292043Srrs/*4*/ { "icachemiss", "thresh > .05", 1919292043Srrs "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1920292043Srrs icache_miss_has, 2 }, 1921292043Srrs/*5*/ { "lcpstall", "thresh > .05", 1922292043Srrs "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1923292043Srrs lcp_stall, 2 }, 1924292043Srrs/*6*/ { "cache1", "thresh >= .1", 1925292043Srrs "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1926292043Srrs cache1broad, 2 }, 1927292043Srrs/*7*/ { "cache2", "thresh >= .2", 1928292043Srrs "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1929292043Srrs cache2broad, 2 }, 1930292043Srrs/*8*/ { "contested", "thresh >= .05", 1931292043Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1", 1932292043Srrs contestedbroad, 2 }, 1933292043Srrs/*9*/ { "datashare", "thresh >= .05", 1934292043Srrs "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1935292043Srrs datasharing_has, 2 }, 1936292043Srrs/*10*/ { "blockstorefwd", "thresh >= .05", 1937292043Srrs "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1938292043Srrs blockstoreforward, 2 }, 1939292043Srrs/*11*/ { "aliasing_4k", "thresh >= .1", 1940292043Srrs "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1941292043Srrs aliasing_broad, 2 }, 1942292043Srrs/*12*/ { "dtlbmissload", "thresh >= .1", 1943292043Srrs "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1944292043Srrs dtlb_missload, 3 }, 1945292043Srrs/*13*/ { "br_miss", "thresh >= .2", 1946292043Srrs "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", 1947292043Srrs br_mispredict_broad, 7 }, 1948292043Srrs/*14*/ { "clears", "thresh >= .02", 1949292043Srrs "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1950292043Srrs clears_broad, 5 }, 1951292043Srrs/*15*/ { "fpassist", "look for a excessive value", 1952292043Srrs "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", 1953292043Srrs fpassists, 2 }, 1954292043Srrs/*16*/ { "otherassistavx", "look for a excessive value", 1955292043Srrs "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", 1956292043Srrs otherassistavx, 2 }, 1957292043Srrs/*17*/ { "microassist", "thresh >= .2", 1958292043Srrs "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1", 1959292043Srrs microassist_broad, 4 }, 1960292027Srrs}; 1961292027Srrs 1962292043Srrs 1963292027Srrsstatic void 1964277177Srrsset_sandybridge(void) 1965277177Srrs{ 1966277177Srrs strcpy(the_cpu.cputype, "SandyBridge PMC"); 1967277177Srrs the_cpu.number = SANDY_BRIDGE_COUNT; 1968277177Srrs the_cpu.ents = sandy_bridge; 1969277177Srrs the_cpu.explain = explain_name_sb; 1970277177Srrs} 1971277177Srrs 1972277177Srrsstatic void 1973277177Srrsset_ivybridge(void) 1974277177Srrs{ 1975277177Srrs strcpy(the_cpu.cputype, "IvyBridge PMC"); 1976277177Srrs the_cpu.number = IVY_BRIDGE_COUNT; 1977277177Srrs the_cpu.ents = ivy_bridge; 1978277177Srrs the_cpu.explain = explain_name_ib; 1979277177Srrs} 1980277177Srrs 1981277177Srrs 1982277177Srrsstatic void 1983277177Srrsset_haswell(void) 1984277177Srrs{ 1985277177Srrs strcpy(the_cpu.cputype, "HASWELL PMC"); 1986277177Srrs the_cpu.number = HASWELL_COUNT; 1987277177Srrs the_cpu.ents = haswell; 1988277177Srrs the_cpu.explain = explain_name_has; 1989277177Srrs} 1990277177Srrs 1991292043Srrs 1992277177Srrsstatic void 1993292027Srrsset_broadwell(void) 1994292027Srrs{ 1995292027Srrs strcpy(the_cpu.cputype, "HASWELL PMC"); 1996292027Srrs the_cpu.number = BROADWELL_COUNT; 1997292027Srrs the_cpu.ents = broadwell; 1998292027Srrs the_cpu.explain = explain_name_broad; 1999292027Srrs} 2000292027Srrs 2001292043Srrs 2002292043Srrsstatic int 2003292043Srrsset_expression(const char *name) 2004277177Srrs{ 2005277177Srrs int found = 0, i; 2006292043Srrs for(i=0 ; i< the_cpu.number; i++) { 2007277177Srrs if (strcmp(name, the_cpu.ents[i].name) == 0) { 2008277177Srrs found = 1; 2009277177Srrs expression = the_cpu.ents[i].func; 2010277177Srrs command = the_cpu.ents[i].command; 2011277177Srrs threshold = the_cpu.ents[i].thresh; 2012292043Srrs if (the_cpu.ents[i].counters_required > max_pmc_counters) { 2013292043Srrs printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n", 2014292043Srrs the_cpu.ents[i].name, 2015292043Srrs the_cpu.ents[i].counters_required, max_pmc_counters); 2016292043Srrs printf("Sorry this test can not be run\n"); 2017292043Srrs if (run_all == 0) { 2018292043Srrs exit(-1); 2019292043Srrs } else { 2020292043Srrs return(-1); 2021292043Srrs } 2022292043Srrs } 2023277177Srrs break; 2024277177Srrs } 2025277177Srrs } 2026277177Srrs if (!found) { 2027277177Srrs printf("For CPU type %s we have no expression:%s\n", 2028292043Srrs the_cpu.cputype, name); 2029277177Srrs exit(-1); 2030277177Srrs } 2031292043Srrs return(0); 2032277177Srrs} 2033277177Srrs 2034292043Srrs 2035292043Srrs 2036292043Srrs 2037292043Srrs 2038277177Srrsstatic int 2039292043Srrsvalidate_expression(char *name) 2040277177Srrs{ 2041277177Srrs int i, found; 2042277177Srrs 2043277177Srrs found = 0; 2044292043Srrs for(i=0 ; i< the_cpu.number; i++) { 2045277177Srrs if (strcmp(name, the_cpu.ents[i].name) == 0) { 2046277177Srrs found = 1; 2047277177Srrs break; 2048277177Srrs } 2049277177Srrs } 2050277177Srrs if (!found) { 2051292043Srrs return(-1); 2052277177Srrs } 2053277177Srrs return (0); 2054277177Srrs} 2055277177Srrs 2056277177Srrsstatic void 2057277177Srrsdo_expression(struct counters *cpu, int pos) 2058277177Srrs{ 2059292043Srrs if (expression == NULL) 2060277177Srrs return; 2061292043Srrs (*expression)(cpu, pos); 2062277177Srrs} 2063277177Srrs 2064277177Srrsstatic void 2065277177Srrsprocess_header(int idx, char *p) 2066277177Srrs{ 2067277177Srrs struct counters *up; 2068277177Srrs int i, len, nlen; 2069292043Srrs /* 2070292043Srrs * Given header element idx, at p in 2071292043Srrs * form 's/NN/nameof' 2072292043Srrs * process the entry to pull out the name and 2073292043Srrs * the CPU number. 2074277177Srrs */ 2075277177Srrs if (strncmp(p, "s/", 2)) { 2076277177Srrs printf("Check -- invalid header no s/ in %s\n", 2077292043Srrs p); 2078277177Srrs return; 2079277177Srrs } 2080277177Srrs up = &cnts[idx]; 2081277177Srrs up->cpu = strtol(&p[2], NULL, 10); 2082277177Srrs len = strlen(p); 2083292043Srrs for (i=2; i<len; i++) { 2084277177Srrs if (p[i] == '/') { 2085292043Srrs nlen = strlen(&p[(i+1)]); 2086292043Srrs if (nlen < (MAX_NLEN-1)) { 2087292043Srrs strcpy(up->counter_name, &p[(i+1)]); 2088277177Srrs } else { 2089292043Srrs strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1)); 2090277177Srrs } 2091277177Srrs } 2092277177Srrs } 2093277177Srrs} 2094277177Srrs 2095277177Srrsstatic void 2096292043Srrsbuild_counters_from_header(FILE *io) 2097277177Srrs{ 2098277177Srrs char buffer[8192], *p; 2099277177Srrs int i, len, cnt; 2100277177Srrs size_t mlen; 2101277177Srrs 2102292043Srrs /* We have a new start, lets 2103292043Srrs * setup our headers and cpus. 2104277177Srrs */ 2105277177Srrs if (fgets(buffer, sizeof(buffer), io) == NULL) { 2106277177Srrs printf("First line can't be read from file err:%d\n", errno); 2107277177Srrs return; 2108277177Srrs } 2109277177Srrs /* 2110292043Srrs * Ok output is an array of counters. Once 2111292043Srrs * we start to read the values in we must 2112292043Srrs * put them in there slot to match there CPU and 2113292043Srrs * counter being updated. We create a mass array 2114292043Srrs * of the counters, filling in the CPU and 2115292043Srrs * counter name. 2116277177Srrs */ 2117277177Srrs /* How many do we get? */ 2118277177Srrs len = strlen(buffer); 2119292043Srrs for (i=0, cnt=0; i<len; i++) { 2120277177Srrs if (strncmp(&buffer[i], "s/", 2) == 0) { 2121277177Srrs cnt++; 2122292043Srrs for(;i<len;i++) { 2123277177Srrs if (buffer[i] == ' ') 2124277177Srrs break; 2125277177Srrs } 2126277177Srrs } 2127277177Srrs } 2128277177Srrs mlen = sizeof(struct counters) * cnt; 2129277177Srrs cnts = malloc(mlen); 2130277177Srrs ncnts = cnt; 2131277177Srrs if (cnts == NULL) { 2132277177Srrs printf("No memory err:%d\n", errno); 2133277177Srrs return; 2134277177Srrs } 2135277177Srrs memset(cnts, 0, mlen); 2136292043Srrs for (i=0, cnt=0; i<len; i++) { 2137277177Srrs if (strncmp(&buffer[i], "s/", 2) == 0) { 2138277177Srrs p = &buffer[i]; 2139292043Srrs for(;i<len;i++) { 2140277177Srrs if (buffer[i] == ' ') { 2141277177Srrs buffer[i] = 0; 2142277177Srrs break; 2143277177Srrs } 2144277177Srrs } 2145277177Srrs process_header(cnt, p); 2146277177Srrs cnt++; 2147277177Srrs } 2148277177Srrs } 2149277177Srrs if (verbose) 2150292043Srrs printf("We have %d entries\n", cnt); 2151277177Srrs} 2152277177Srrsextern int max_to_collect; 2153277177Srrsint max_to_collect = MAX_COUNTER_SLOTS; 2154277177Srrs 2155277177Srrsstatic int 2156292043Srrsread_a_line(FILE *io) 2157277177Srrs{ 2158292043Srrs char buffer[8192], *p, *stop; 2159277177Srrs int pos, i; 2160277177Srrs 2161277177Srrs if (fgets(buffer, sizeof(buffer), io) == NULL) { 2162292043Srrs return(0); 2163277177Srrs } 2164277177Srrs p = buffer; 2165292043Srrs for (i=0; i<ncnts; i++) { 2166277177Srrs pos = cnts[i].pos; 2167277177Srrs cnts[i].vals[pos] = strtol(p, &stop, 0); 2168277177Srrs cnts[i].pos++; 2169277177Srrs cnts[i].sum += cnts[i].vals[pos]; 2170277177Srrs p = stop; 2171277177Srrs } 2172277177Srrs return (1); 2173277177Srrs} 2174277177Srrs 2175277177Srrsextern int cpu_count_out; 2176292043Srrsint cpu_count_out=0; 2177277177Srrs 2178277177Srrsstatic void 2179277177Srrsprint_header(void) 2180277177Srrs{ 2181277177Srrs int i, cnt, printed_cnt; 2182277177Srrs 2183277177Srrs printf("*********************************\n"); 2184292043Srrs for(i=0, cnt=0; i<MAX_CPU; i++) { 2185277177Srrs if (glob_cpu[i]) { 2186277177Srrs cnt++; 2187277177Srrs } 2188292043Srrs } 2189277177Srrs cpu_count_out = cnt; 2190292043Srrs for(i=0, printed_cnt=0; i<MAX_CPU; i++) { 2191277177Srrs if (glob_cpu[i]) { 2192277177Srrs printf("CPU%d", i); 2193277177Srrs printed_cnt++; 2194277177Srrs } 2195277177Srrs if (printed_cnt == cnt) { 2196277177Srrs printf("\n"); 2197277177Srrs break; 2198277177Srrs } else { 2199277177Srrs printf("\t"); 2200277177Srrs } 2201277177Srrs } 2202277177Srrs} 2203277177Srrs 2204277177Srrsstatic void 2205277177Srrslace_cpus_together(void) 2206277177Srrs{ 2207277177Srrs int i, j, lace_cpu; 2208277177Srrs struct counters *cpat, *at; 2209277177Srrs 2210292043Srrs for(i=0; i<ncnts; i++) { 2211277177Srrs cpat = &cnts[i]; 2212277177Srrs if (cpat->next_cpu) { 2213277177Srrs /* Already laced in */ 2214277177Srrs continue; 2215277177Srrs } 2216277177Srrs lace_cpu = cpat->cpu; 2217277177Srrs if (lace_cpu >= MAX_CPU) { 2218277177Srrs printf("CPU %d to big\n", lace_cpu); 2219277177Srrs continue; 2220277177Srrs } 2221277177Srrs if (glob_cpu[lace_cpu] == NULL) { 2222277177Srrs glob_cpu[lace_cpu] = cpat; 2223277177Srrs } else { 2224277177Srrs /* Already processed this cpu */ 2225277177Srrs continue; 2226277177Srrs } 2227277177Srrs /* Ok look forward for cpu->cpu and link in */ 2228292043Srrs for(j=(i+1); j<ncnts; j++) { 2229277177Srrs at = &cnts[j]; 2230277177Srrs if (at->next_cpu) { 2231277177Srrs continue; 2232277177Srrs } 2233277177Srrs if (at->cpu == lace_cpu) { 2234277177Srrs /* Found one */ 2235277177Srrs cpat->next_cpu = at; 2236277177Srrs cpat = at; 2237277177Srrs } 2238277177Srrs } 2239277177Srrs } 2240277177Srrs} 2241277177Srrs 2242277177Srrs 2243277177Srrsstatic void 2244277177Srrsprocess_file(char *filename) 2245277177Srrs{ 2246277177Srrs FILE *io; 2247277177Srrs int i; 2248277177Srrs int line_at, not_done; 2249292043Srrs pid_t pid_of_command=0; 2250277177Srrs 2251292043Srrs if (filename == NULL) { 2252277177Srrs io = my_popen(command, "r", &pid_of_command); 2253277177Srrs } else { 2254277177Srrs io = fopen(filename, "r"); 2255277177Srrs if (io == NULL) { 2256277177Srrs printf("Can't process file %s err:%d\n", 2257292043Srrs filename, errno); 2258277177Srrs return; 2259277177Srrs } 2260277177Srrs } 2261277177Srrs build_counters_from_header(io); 2262277177Srrs if (cnts == NULL) { 2263277177Srrs /* Nothing we can do */ 2264277177Srrs printf("Nothing to do -- no counters built\n"); 2265292043Srrs if (io) { 2266292027Srrs fclose(io); 2267277485Srrs } 2268277177Srrs return; 2269277177Srrs } 2270277177Srrs lace_cpus_together(); 2271277177Srrs print_header(); 2272277177Srrs if (verbose) { 2273292043Srrs for (i=0; i<ncnts; i++) { 2274277177Srrs printf("Counter:%s cpu:%d index:%d\n", 2275292043Srrs cnts[i].counter_name, 2276292043Srrs cnts[i].cpu, i); 2277277177Srrs } 2278277177Srrs } 2279277177Srrs line_at = 0; 2280277177Srrs not_done = 1; 2281292043Srrs while(not_done) { 2282277177Srrs if (read_a_line(io)) { 2283277177Srrs line_at++; 2284277177Srrs } else { 2285277177Srrs break; 2286277177Srrs } 2287277177Srrs if (line_at >= max_to_collect) { 2288277177Srrs not_done = 0; 2289277177Srrs } 2290277177Srrs if (filename == NULL) { 2291277177Srrs int cnt; 2292277177Srrs /* For the ones we dynamically open we print now */ 2293292043Srrs for(i=0, cnt=0; i<MAX_CPU; i++) { 2294292043Srrs do_expression(glob_cpu[i], (line_at-1)); 2295277177Srrs cnt++; 2296277177Srrs if (cnt == cpu_count_out) { 2297277177Srrs printf("\n"); 2298277177Srrs break; 2299277177Srrs } else { 2300277177Srrs printf("\t"); 2301277177Srrs } 2302277177Srrs } 2303277177Srrs } 2304277177Srrs } 2305277177Srrs if (filename) { 2306277177Srrs fclose(io); 2307277177Srrs } else { 2308277177Srrs my_pclose(io, pid_of_command); 2309277177Srrs } 2310277177Srrs} 2311277177Srrs#if defined(__amd64__) 2312277177Srrs#define cpuid(in,a,b,c,d)\ 2313277177Srrs asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in)); 2314292043Srrs 2315292043Srrsstatic __inline void 2316292043Srrsdo_cpuid(u_int ax, u_int cx, u_int *p) 2317292043Srrs{ 2318292043Srrs __asm __volatile("cpuid" 2319292043Srrs : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) 2320292043Srrs : "0" (ax), "c" (cx) ); 2321292043Srrs} 2322292043Srrs 2323277177Srrs#else 2324292043Srrs#define cpuid(in, a, b, c, d) 2325303675Sbdrewery#define do_cpuid(ax, cx, p) 2326277177Srrs#endif 2327277177Srrs 2328277177Srrsstatic void 2329277177Srrsget_cpuid_set(void) 2330277177Srrs{ 2331277177Srrs unsigned long eax, ebx, ecx, edx; 2332277177Srrs int model; 2333292043Srrs pid_t pid_of_command=0; 2334277177Srrs size_t sz, len; 2335277177Srrs FILE *io; 2336277177Srrs char linebuf[1024], *str; 2337292043Srrs u_int reg[4]; 2338277177Srrs 2339277177Srrs eax = ebx = ecx = edx = 0; 2340277177Srrs 2341277177Srrs cpuid(0, eax, ebx, ecx, edx); 2342277177Srrs if (ebx == 0x68747541) { 2343277177Srrs printf("AMD processors are not supported by this program\n"); 2344277177Srrs printf("Sorry\n"); 2345277177Srrs exit(0); 2346277177Srrs } else if (ebx == 0x6972794) { 2347277177Srrs printf("Cyrix processors are not supported by this program\n"); 2348277177Srrs printf("Sorry\n"); 2349277177Srrs exit(0); 2350277177Srrs } else if (ebx == 0x756e6547) { 2351277177Srrs printf("Genuine Intel\n"); 2352277177Srrs } else { 2353277177Srrs printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx); 2354277177Srrs exit(0); 2355277177Srrs } 2356277177Srrs cpuid(1, eax, ebx, ecx, edx); 2357277177Srrs model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4)); 2358277177Srrs printf("CPU model is 0x%x id:0x%lx\n", model, eax); 2359277177Srrs switch (eax & 0xF00) { 2360277177Srrs case 0x500: /* Pentium family processors */ 2361277177Srrs printf("Intel Pentium P5\n"); 2362277177Srrs goto not_supported; 2363277177Srrs break; 2364277177Srrs case 0x600: /* Pentium Pro, Celeron, Pentium II & III */ 2365277177Srrs switch (model) { 2366277177Srrs case 0x1: 2367277177Srrs printf("Intel Pentium P6\n"); 2368277177Srrs goto not_supported; 2369277177Srrs break; 2370292043Srrs case 0x3: 2371277177Srrs case 0x5: 2372277177Srrs printf("Intel PII\n"); 2373277177Srrs goto not_supported; 2374277177Srrs break; 2375292043Srrs case 0x6: case 0x16: 2376277177Srrs printf("Intel CL\n"); 2377277177Srrs goto not_supported; 2378277177Srrs break; 2379292043Srrs case 0x7: case 0x8: case 0xA: case 0xB: 2380277177Srrs printf("Intel PIII\n"); 2381277177Srrs goto not_supported; 2382277177Srrs break; 2383292043Srrs case 0x9: case 0xD: 2384277177Srrs printf("Intel PM\n"); 2385277177Srrs goto not_supported; 2386277177Srrs break; 2387277177Srrs case 0xE: 2388277177Srrs printf("Intel CORE\n"); 2389277177Srrs goto not_supported; 2390277177Srrs break; 2391277177Srrs case 0xF: 2392277177Srrs printf("Intel CORE2\n"); 2393277177Srrs goto not_supported; 2394277177Srrs break; 2395277177Srrs case 0x17: 2396277177Srrs printf("Intel CORE2EXTREME\n"); 2397277177Srrs goto not_supported; 2398277177Srrs break; 2399277177Srrs case 0x1C: /* Per Intel document 320047-002. */ 2400277177Srrs printf("Intel ATOM\n"); 2401277177Srrs goto not_supported; 2402277177Srrs break; 2403277177Srrs case 0x1A: 2404292043Srrs case 0x1E: /* 2405292043Srrs * Per Intel document 253669-032 9/2009, 2406292043Srrs * pages A-2 and A-57 2407292043Srrs */ 2408292043Srrs case 0x1F: /* 2409292043Srrs * Per Intel document 253669-032 9/2009, 2410292043Srrs * pages A-2 and A-57 2411292043Srrs */ 2412277177Srrs printf("Intel COREI7\n"); 2413277177Srrs goto not_supported; 2414277177Srrs break; 2415277177Srrs case 0x2E: 2416277177Srrs printf("Intel NEHALEM\n"); 2417277177Srrs goto not_supported; 2418277177Srrs break; 2419277177Srrs case 0x25: /* Per Intel document 253669-033US 12/2009. */ 2420277177Srrs case 0x2C: /* Per Intel document 253669-033US 12/2009. */ 2421277177Srrs printf("Intel WESTMERE\n"); 2422277177Srrs goto not_supported; 2423277177Srrs break; 2424277177Srrs case 0x2F: /* Westmere-EX, seen in wild */ 2425277177Srrs printf("Intel WESTMERE\n"); 2426277177Srrs goto not_supported; 2427277177Srrs break; 2428277177Srrs case 0x2A: /* Per Intel document 253669-039US 05/2011. */ 2429277177Srrs printf("Intel SANDYBRIDGE\n"); 2430277177Srrs set_sandybridge(); 2431277177Srrs break; 2432277177Srrs case 0x2D: /* Per Intel document 253669-044US 08/2012. */ 2433277177Srrs printf("Intel SANDYBRIDGE_XEON\n"); 2434277177Srrs set_sandybridge(); 2435277177Srrs break; 2436277177Srrs case 0x3A: /* Per Intel document 253669-043US 05/2012. */ 2437277177Srrs printf("Intel IVYBRIDGE\n"); 2438277177Srrs set_ivybridge(); 2439277177Srrs break; 2440277177Srrs case 0x3E: /* Per Intel document 325462-045US 01/2013. */ 2441277177Srrs printf("Intel IVYBRIDGE_XEON\n"); 2442277177Srrs set_ivybridge(); 2443277177Srrs break; 2444277177Srrs case 0x3F: /* Per Intel document 325462-045US 09/2014. */ 2445277177Srrs printf("Intel HASWELL (Xeon)\n"); 2446277177Srrs set_haswell(); 2447277177Srrs break; 2448277177Srrs case 0x3C: /* Per Intel document 325462-045US 01/2013. */ 2449277177Srrs case 0x45: 2450277177Srrs case 0x46: 2451277177Srrs printf("Intel HASWELL\n"); 2452277177Srrs set_haswell(); 2453277177Srrs break; 2454292027Srrs 2455292027Srrs case 0x4e: 2456292027Srrs case 0x5e: 2457292027Srrs printf("Intel SKY-LAKE\n"); 2458292027Srrs goto not_supported; 2459292027Srrs break; 2460292027Srrs case 0x3D: 2461292027Srrs case 0x47: 2462292027Srrs printf("Intel BROADWELL\n"); 2463292027Srrs set_broadwell(); 2464292027Srrs break; 2465292027Srrs case 0x4f: 2466292027Srrs case 0x56: 2467292043Srrs printf("Intel BROADWEL (Xeon)\n"); 2468292027Srrs set_broadwell(); 2469292027Srrs break; 2470292027Srrs 2471277177Srrs case 0x4D: 2472277177Srrs /* Per Intel document 330061-001 01/2014. */ 2473277177Srrs printf("Intel ATOM_SILVERMONT\n"); 2474277177Srrs goto not_supported; 2475277177Srrs break; 2476277177Srrs default: 2477277177Srrs printf("Intel model 0x%x is not known -- sorry\n", 2478292043Srrs model); 2479277177Srrs goto not_supported; 2480277177Srrs break; 2481277177Srrs } 2482277177Srrs break; 2483277177Srrs case 0xF00: /* P4 */ 2484277177Srrs printf("Intel unknown model %d\n", model); 2485277177Srrs goto not_supported; 2486277177Srrs break; 2487277177Srrs } 2488292043Srrs do_cpuid(0xa, 0, reg); 2489292043Srrs max_pmc_counters = (reg[3] & 0x0000000f) + 1; 2490292043Srrs printf("We have %d PMC counters to work with\n", max_pmc_counters); 2491277177Srrs /* Ok lets load the list of all known PMC's */ 2492277177Srrs io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command); 2493277177Srrs if (valid_pmcs == NULL) { 2494277177Srrs /* Likely */ 2495277177Srrs pmc_allocated_cnt = PMC_INITIAL_ALLOC; 2496277177Srrs sz = sizeof(char *) * pmc_allocated_cnt; 2497277177Srrs valid_pmcs = malloc(sz); 2498277177Srrs if (valid_pmcs == NULL) { 2499292043Srrs printf("No memory allocation fails at startup?\n"); 2500277177Srrs exit(-1); 2501277177Srrs } 2502277177Srrs memset(valid_pmcs, 0, sz); 2503277177Srrs } 2504292043Srrs 2505277177Srrs while (fgets(linebuf, sizeof(linebuf), io) != NULL) { 2506277177Srrs if (linebuf[0] != '\t') { 2507277177Srrs /* sometimes headers ;-) */ 2508277177Srrs continue; 2509277177Srrs } 2510277177Srrs len = strlen(linebuf); 2511292043Srrs if (linebuf[(len-1)] == '\n') { 2512277177Srrs /* Likely */ 2513292043Srrs linebuf[(len-1)] = 0; 2514277177Srrs } 2515277177Srrs str = &linebuf[1]; 2516277177Srrs len = strlen(str) + 1; 2517277177Srrs valid_pmcs[valid_pmc_cnt] = malloc(len); 2518277177Srrs if (valid_pmcs[valid_pmc_cnt] == NULL) { 2519292043Srrs printf("No memory2 allocation fails at startup?\n"); 2520277177Srrs exit(-1); 2521277177Srrs } 2522277177Srrs memset(valid_pmcs[valid_pmc_cnt], 0, len); 2523277177Srrs strcpy(valid_pmcs[valid_pmc_cnt], str); 2524277177Srrs valid_pmc_cnt++; 2525277177Srrs if (valid_pmc_cnt >= pmc_allocated_cnt) { 2526277177Srrs /* Got to expand -- unlikely */ 2527277177Srrs char **more; 2528277177Srrs 2529277177Srrs sz = sizeof(char *) * (pmc_allocated_cnt * 2); 2530277177Srrs more = malloc(sz); 2531277177Srrs if (more == NULL) { 2532292043Srrs printf("No memory3 allocation fails at startup?\n"); 2533277177Srrs exit(-1); 2534277177Srrs } 2535277485Srrs memset(more, 0, sz); 2536277177Srrs memcpy(more, valid_pmcs, sz); 2537277177Srrs pmc_allocated_cnt *= 2; 2538277177Srrs free(valid_pmcs); 2539277177Srrs valid_pmcs = more; 2540277177Srrs } 2541277177Srrs } 2542292043Srrs my_pclose(io, pid_of_command); 2543277177Srrs return; 2544277177Srrsnot_supported: 2545292043Srrs printf("Not supported\n"); 2546277177Srrs exit(-1); 2547277177Srrs} 2548277177Srrs 2549277177Srrsstatic void 2550277177Srrsexplain_all(void) 2551277177Srrs{ 2552277177Srrs int i; 2553292043Srrs printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype); 2554277177Srrs printf("-------------------------------------------------------------\n"); 2555292043Srrs for(i=0; i<the_cpu.number; i++){ 2556277177Srrs printf("For -e %s ", the_cpu.ents[i].name); 2557292043Srrs (*the_cpu.explain)(the_cpu.ents[i].name); 2558277177Srrs printf("----------------------------\n"); 2559277177Srrs } 2560277177Srrs} 2561277177Srrs 2562277177Srrsstatic void 2563277177Srrstest_for_a_pmc(const char *pmc, int out_so_far) 2564277177Srrs{ 2565277177Srrs FILE *io; 2566292043Srrs pid_t pid_of_command=0; 2567277177Srrs char my_command[1024]; 2568277177Srrs char line[1024]; 2569277177Srrs char resp[1024]; 2570277177Srrs int len, llen, i; 2571277177Srrs 2572277177Srrs if (out_so_far < 50) { 2573277177Srrs len = 50 - out_so_far; 2574292043Srrs for(i=0; i<len; i++) { 2575277177Srrs printf(" "); 2576277177Srrs } 2577277177Srrs } 2578277177Srrs sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc); 2579292043Srrs io = my_popen(my_command, "r", &pid_of_command); 2580277177Srrs if (io == NULL) { 2581277177Srrs printf("Failed -- popen fails\n"); 2582277177Srrs return; 2583277177Srrs } 2584277177Srrs /* Setup what we expect */ 2585277177Srrs len = sprintf(resp, "%s", pmc); 2586277177Srrs if (fgets(line, sizeof(line), io) == NULL) { 2587277177Srrs printf("Failed -- no output from pmstat\n"); 2588277177Srrs goto out; 2589277177Srrs } 2590277177Srrs llen = strlen(line); 2591292043Srrs if (line[(llen-1)] == '\n') { 2592292043Srrs line[(llen-1)] = 0; 2593277177Srrs llen--; 2594277177Srrs } 2595292043Srrs for(i=2; i<(llen-len); i++) { 2596277177Srrs if (strncmp(&line[i], "ERROR", 5) == 0) { 2597277177Srrs printf("Failed %s\n", line); 2598277177Srrs goto out; 2599277177Srrs } else if (strncmp(&line[i], resp, len) == 0) { 2600277177Srrs int j, k; 2601277177Srrs 2602277177Srrs if (fgets(line, sizeof(line), io) == NULL) { 2603277177Srrs printf("Failed -- no second output from pmstat\n"); 2604277177Srrs goto out; 2605277177Srrs } 2606277177Srrs len = strlen(line); 2607292043Srrs for (j=0; j<len; j++) { 2608277177Srrs if (line[j] == ' ') { 2609292043Srrs j++; 2610277177Srrs } else { 2611277177Srrs break; 2612277177Srrs } 2613277177Srrs } 2614277177Srrs printf("Pass"); 2615277177Srrs len = strlen(&line[j]); 2616277177Srrs if (len < 20) { 2617292043Srrs for(k=0; k<(20-len); k++) { 2618277177Srrs printf(" "); 2619277177Srrs } 2620277177Srrs } 2621280697Srrs if (len) { 2622280697Srrs printf("%s", &line[j]); 2623280697Srrs } else { 2624280697Srrs printf("\n"); 2625280697Srrs } 2626277177Srrs goto out; 2627277177Srrs } 2628277177Srrs } 2629277177Srrs printf("Failed -- '%s' not '%s'\n", line, resp); 2630277177Srrsout: 2631292043Srrs my_pclose(io, pid_of_command); 2632292043Srrs 2633277177Srrs} 2634277177Srrs 2635277177Srrsstatic int 2636277177Srrsadd_it_to(char **vars, int cur_cnt, char *name) 2637277177Srrs{ 2638277177Srrs int i; 2639277177Srrs size_t len; 2640292043Srrs for(i=0; i<cur_cnt; i++) { 2641277177Srrs if (strcmp(vars[i], name) == 0) { 2642277177Srrs /* Already have */ 2643292043Srrs return(0); 2644277177Srrs } 2645277177Srrs } 2646277177Srrs if (vars[cur_cnt] != NULL) { 2647292043Srrs printf("Cur_cnt:%d filled with %s??\n", 2648292043Srrs cur_cnt, vars[cur_cnt]); 2649277177Srrs exit(-1); 2650277177Srrs } 2651277177Srrs /* Ok its new */ 2652277177Srrs len = strlen(name) + 1; 2653277177Srrs vars[cur_cnt] = malloc(len); 2654277177Srrs if (vars[cur_cnt] == NULL) { 2655277177Srrs printf("No memory %s\n", __FUNCTION__); 2656277177Srrs exit(-1); 2657277177Srrs } 2658277177Srrs memset(vars[cur_cnt], 0, len); 2659277177Srrs strcpy(vars[cur_cnt], name); 2660292043Srrs return(1); 2661277177Srrs} 2662277177Srrs 2663277177Srrsstatic char * 2664277177Srrsbuild_command_for_exp(struct expression *exp) 2665277177Srrs{ 2666277177Srrs /* 2667292043Srrs * Build the pmcstat command to handle 2668292043Srrs * the passed in expression. 2669292043Srrs * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ 2670292043Srrs * where NNN and QQQ represent the PMC's in the expression 2671292043Srrs * uniquely.. 2672277177Srrs */ 2673277177Srrs char forming[1024]; 2674277177Srrs int cnt_pmc, alloced_pmcs, i; 2675277177Srrs struct expression *at; 2676277177Srrs char **vars, *cmd; 2677277177Srrs size_t mal; 2678277177Srrs 2679277177Srrs alloced_pmcs = cnt_pmc = 0; 2680277177Srrs /* first how many do we have */ 2681277177Srrs at = exp; 2682277177Srrs while (at) { 2683277177Srrs if (at->type == TYPE_VALUE_PMC) { 2684277177Srrs cnt_pmc++; 2685277177Srrs } 2686277177Srrs at = at->next; 2687277177Srrs } 2688277177Srrs if (cnt_pmc == 0) { 2689277177Srrs printf("No PMC's in your expression -- nothing to do!!\n"); 2690277177Srrs exit(0); 2691277177Srrs } 2692277177Srrs mal = cnt_pmc * sizeof(char *); 2693277177Srrs vars = malloc(mal); 2694277177Srrs if (vars == NULL) { 2695277177Srrs printf("No memory\n"); 2696277177Srrs exit(-1); 2697277177Srrs } 2698277177Srrs memset(vars, 0, mal); 2699277177Srrs at = exp; 2700277177Srrs while (at) { 2701277177Srrs if (at->type == TYPE_VALUE_PMC) { 2702292043Srrs if(add_it_to(vars, alloced_pmcs, at->name)) { 2703277177Srrs alloced_pmcs++; 2704277177Srrs } 2705277177Srrs } 2706277177Srrs at = at->next; 2707277177Srrs } 2708277177Srrs /* Now we have a unique list in vars so create our command */ 2709292043Srrs mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */ 2710292043Srrs for(i=0; i<alloced_pmcs; i++) { 2711277177Srrs mal += strlen(vars[i]) + 4; /* var + " -s " */ 2712277177Srrs } 2713292043Srrs cmd = malloc((mal+2)); 2714277177Srrs if (cmd == NULL) { 2715277177Srrs printf("%s out of mem\n", __FUNCTION__); 2716277177Srrs exit(-1); 2717277177Srrs } 2718292043Srrs memset(cmd, 0, (mal+2)); 2719277177Srrs strcpy(cmd, "/usr/sbin/pmcstat -w 1"); 2720277177Srrs at = exp; 2721292043Srrs for(i=0; i<alloced_pmcs; i++) { 2722277177Srrs sprintf(forming, " -s %s", vars[i]); 2723277177Srrs strcat(cmd, forming); 2724277177Srrs free(vars[i]); 2725277177Srrs vars[i] = NULL; 2726277177Srrs } 2727277177Srrs free(vars); 2728292043Srrs return(cmd); 2729277177Srrs} 2730277177Srrs 2731277177Srrsstatic int 2732277177Srrsuser_expr(struct counters *cpu, int pos) 2733277177Srrs{ 2734292043Srrs int ret; 2735277177Srrs double res; 2736277177Srrs struct counters *var; 2737277177Srrs struct expression *at; 2738277177Srrs 2739277177Srrs at = master_exp; 2740277177Srrs while (at) { 2741277177Srrs if (at->type == TYPE_VALUE_PMC) { 2742277177Srrs var = find_counter(cpu, at->name); 2743277177Srrs if (var == NULL) { 2744277177Srrs printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name); 2745277177Srrs exit(-1); 2746277177Srrs } 2747277177Srrs if (pos != -1) { 2748277177Srrs at->value = var->vals[pos] * 1.0; 2749277177Srrs } else { 2750277177Srrs at->value = var->sum * 1.0; 2751277177Srrs } 2752277177Srrs } 2753277177Srrs at = at->next; 2754277177Srrs } 2755277177Srrs res = run_expr(master_exp, 1, NULL); 2756277177Srrs ret = printf("%1.3f", res); 2757292043Srrs return(ret); 2758277177Srrs} 2759277177Srrs 2760277177Srrs 2761277177Srrsstatic void 2762277177Srrsset_manual_exp(struct expression *exp) 2763277177Srrs{ 2764277177Srrs expression = user_expr; 2765277177Srrs command = build_command_for_exp(exp); 2766277177Srrs threshold = "User defined threshold"; 2767277177Srrs} 2768277177Srrs 2769277177Srrsstatic void 2770277177Srrsrun_tests(void) 2771277177Srrs{ 2772277177Srrs int i, lenout; 2773277177Srrs printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt); 2774277177Srrs printf("------------------------------------------------------------------------\n"); 2775292043Srrs for(i=0; i<valid_pmc_cnt; i++) { 2776277177Srrs lenout = printf("%s", valid_pmcs[i]); 2777277177Srrs fflush(stdout); 2778277177Srrs test_for_a_pmc(valid_pmcs[i], lenout); 2779277177Srrs } 2780277177Srrs} 2781277177Srrsstatic void 2782277177Srrslist_all(void) 2783277177Srrs{ 2784277177Srrs int i, cnt, j; 2785277177Srrs printf("PMC Abbreviation\n"); 2786277177Srrs printf("--------------------------------------------------------------\n"); 2787292043Srrs for(i=0; i<valid_pmc_cnt; i++) { 2788277177Srrs cnt = printf("%s", valid_pmcs[i]); 2789292043Srrs for(j=cnt; j<52; j++) { 2790277177Srrs printf(" "); 2791277177Srrs } 2792277177Srrs printf("%%%d\n", i); 2793277177Srrs } 2794277177Srrs} 2795277177Srrs 2796277177Srrs 2797277177Srrsint 2798277177Srrsmain(int argc, char **argv) 2799277177Srrs{ 2800277177Srrs int i, j, cnt; 2801292043Srrs char *filename=NULL; 2802292043Srrs const char *name=NULL; 2803277177Srrs int help_only = 0; 2804277177Srrs int test_mode = 0; 2805292043Srrs int test_at = 0; 2806277177Srrs 2807277177Srrs get_cpuid_set(); 2808277177Srrs memset(glob_cpu, 0, sizeof(glob_cpu)); 2809292043Srrs while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) { 2810277177Srrs switch (i) { 2811292043Srrs case 'A': 2812292043Srrs run_all = 1; 2813292043Srrs break; 2814277177Srrs case 'L': 2815277177Srrs list_all(); 2816292043Srrs return(0); 2817277177Srrs case 'H': 2818277177Srrs printf("**********************************\n"); 2819277177Srrs explain_all(); 2820277177Srrs printf("**********************************\n"); 2821292043Srrs return(0); 2822277177Srrs break; 2823277177Srrs case 'T': 2824277177Srrs test_mode = 1; 2825277177Srrs break; 2826277177Srrs case 'E': 2827277177Srrs master_exp = parse_expression(optarg); 2828277177Srrs if (master_exp) { 2829277177Srrs set_manual_exp(master_exp); 2830277177Srrs } 2831277177Srrs break; 2832277177Srrs case 'e': 2833277177Srrs if (validate_expression(optarg)) { 2834277177Srrs printf("Unknown expression %s\n", optarg); 2835292043Srrs return(0); 2836277177Srrs } 2837277177Srrs name = optarg; 2838277177Srrs set_expression(optarg); 2839277177Srrs break; 2840277177Srrs case 'm': 2841277177Srrs max_to_collect = strtol(optarg, NULL, 0); 2842277177Srrs if (max_to_collect > MAX_COUNTER_SLOTS) { 2843277177Srrs /* You can't collect more than max in array */ 2844277177Srrs max_to_collect = MAX_COUNTER_SLOTS; 2845277177Srrs } 2846277177Srrs break; 2847277177Srrs case 'v': 2848277177Srrs verbose++; 2849277177Srrs break; 2850277177Srrs case 'h': 2851277177Srrs help_only = 1; 2852277177Srrs break; 2853277177Srrs case 'i': 2854277177Srrs filename = optarg; 2855277177Srrs break; 2856277177Srrs case '?': 2857277177Srrs default: 2858292043Srrs use: 2859277177Srrs printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n", 2860292043Srrs argv[0]); 2861277177Srrs printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n"); 2862277177Srrs printf("-v -- verbose dump debug type things -- you don't want this\n"); 2863277177Srrs printf("-m N -- maximum to collect is N measurments\n"); 2864277177Srrs printf("-e expr-name -- Do expression expr-name\n"); 2865277177Srrs printf("-E 'your expression' -- Do your expression\n"); 2866277177Srrs printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n"); 2867277177Srrs printf("-H -- Don't run anything, just explain all canned expressions\n"); 2868277177Srrs printf("-T -- Test all PMC's defined by this processor\n"); 2869292043Srrs printf("-A -- Run all canned tests\n"); 2870292043Srrs return(0); 2871277177Srrs break; 2872298089Spfg } 2873277177Srrs } 2874292043Srrs if ((run_all == 0) && (name == NULL) && (filename == NULL) && 2875292043Srrs (test_mode == 0) && (master_exp == NULL)) { 2876277177Srrs printf("Without setting an expression we cannot dynamically gather information\n"); 2877277177Srrs printf("you must supply a filename (and you probably want verbosity)\n"); 2878277177Srrs goto use; 2879277177Srrs } 2880292043Srrs if (run_all && max_to_collect > 10) { 2881292043Srrs max_to_collect = 3; 2882292043Srrs } 2883277177Srrs if (test_mode) { 2884277177Srrs run_tests(); 2885292043Srrs return(0); 2886277177Srrs } 2887277177Srrs printf("*********************************\n"); 2888292043Srrs if ((master_exp == NULL) && name) { 2889292043Srrs (*the_cpu.explain)(name); 2890292043Srrs } else if (master_exp) { 2891277177Srrs printf("Examine your expression "); 2892277177Srrs print_exp(master_exp); 2893277177Srrs printf("User defined threshold\n"); 2894277177Srrs } 2895277177Srrs if (help_only) { 2896292043Srrs return(0); 2897277177Srrs } 2898292043Srrs if (run_all) { 2899292043Srrs more: 2900292043Srrs name = the_cpu.ents[test_at].name; 2901292043Srrs printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh); 2902292043Srrs test_at++; 2903292043Srrs if (set_expression(name) == -1) { 2904292043Srrs if (test_at >= the_cpu.number) { 2905292043Srrs goto done; 2906292043Srrs } else 2907292043Srrs goto more; 2908292043Srrs } 2909292043Srrs 2910292043Srrs } 2911277177Srrs process_file(filename); 2912277177Srrs if (verbose >= 2) { 2913292043Srrs for (i=0; i<ncnts; i++) { 2914277177Srrs printf("Counter:%s cpu:%d index:%d\n", 2915292043Srrs cnts[i].counter_name, 2916292043Srrs cnts[i].cpu, i); 2917292043Srrs for(j=0; j<cnts[i].pos; j++) { 2918277177Srrs printf(" val - %ld\n", (long int)cnts[i].vals[j]); 2919277177Srrs } 2920277177Srrs printf(" sum - %ld\n", (long int)cnts[i].sum); 2921277177Srrs } 2922277177Srrs } 2923277177Srrs if (expression == NULL) { 2924292043Srrs return(0); 2925277177Srrs } 2926292043Srrs if (max_to_collect > 1) { 2927292043Srrs for(i=0, cnt=0; i<MAX_CPU; i++) { 2928292043Srrs if (glob_cpu[i]) { 2929292043Srrs do_expression(glob_cpu[i], -1); 2930292043Srrs cnt++; 2931292043Srrs if (cnt == cpu_count_out) { 2932292043Srrs printf("\n"); 2933292043Srrs break; 2934292043Srrs } else { 2935292043Srrs printf("\t"); 2936292043Srrs } 2937277177Srrs } 2938277177Srrs } 2939277177Srrs } 2940292043Srrs if (run_all && (test_at < the_cpu.number)) { 2941292043Srrs memset(glob_cpu, 0, sizeof(glob_cpu)); 2942292043Srrs ncnts = 0; 2943292043Srrs printf("*********************************\n"); 2944292043Srrs goto more; 2945292043Srrs } else if (run_all) { 2946292043Srrs done: 2947292043Srrs printf("*********************************\n"); 2948292043Srrs } 2949292043Srrs return(0); 2950277177Srrs} 2951