1277177Srrs/*-
2277177Srrs * Copyright (c) 2014, 2015 Netflix Inc.
3277177Srrs * All rights reserved.
4277177Srrs *
5277177Srrs * Redistribution and use in source and binary forms, with or without
6277177Srrs * modification, are permitted provided that the following conditions
7277177Srrs * are met:
8277177Srrs * 1. Redistributions of source code must retain the above copyright
9277177Srrs *    notice, this list of conditions and the following disclaimer,
10277177Srrs *    in this position and unchanged.
11277177Srrs * 2. Redistributions in binary form must reproduce the above copyright
12277177Srrs *    notice, this list of conditions and the following disclaimer in the
13277177Srrs *    documentation and/or other materials provided with the distribution.
14277177Srrs * 3. The name of the author may not be used to endorse or promote products
15277177Srrs *    derived from this software without specific prior written permission
16277177Srrs *
17277177Srrs * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18277177Srrs * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19277177Srrs * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20277177Srrs * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21277177Srrs * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22277177Srrs * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23277177Srrs * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24277177Srrs * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25277177Srrs * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26277177Srrs * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27277177Srrs */
28277177Srrs#include <sys/types.h>
29277177Srrs#include <stdio.h>
30277177Srrs#include <stdlib.h>
31277177Srrs#include <unistd.h>
32277177Srrs#include <string.h>
33277177Srrs#include <strings.h>
34277177Srrs#include <sys/errno.h>
35277177Srrs#include <signal.h>
36277177Srrs#include <sys/wait.h>
37277177Srrs#include <getopt.h>
38277177Srrs#include "eval_expr.h"
39277177Srrs__FBSDID("$FreeBSD: releng/10.2/usr.sbin/pmcstudy/pmcstudy.c 285853 2015-07-24 19:37:30Z emaste $");
40277177Srrs
41277177Srrs#define MAX_COUNTER_SLOTS 1024
42277177Srrs#define MAX_NLEN 64
43277177Srrs#define MAX_CPU 64
44277177Srrsstatic int verbose = 0;
45277177Srrs
46277177Srrsextern char **environ;
47277177Srrsextern struct expression *master_exp;
48277177Srrsstruct expression *master_exp=NULL;
49277177Srrs
50277177Srrs#define PMC_INITIAL_ALLOC 512
51277177Srrsextern char **valid_pmcs;
52277177Srrschar **valid_pmcs = NULL;
53277177Srrsextern int valid_pmc_cnt;
54277177Srrsint valid_pmc_cnt=0;
55277177Srrsextern int pmc_allocated_cnt;
56277177Srrsint pmc_allocated_cnt=0;
57277177Srrs
58277177Srrs/*
59277177Srrs * The following two varients on popen and pclose with
60277177Srrs * the cavet that they get you the PID so that you
61277177Srrs * can supply it to pclose so it can send a SIGTERM
62277177Srrs *  to the process.
63277177Srrs */
64277177Srrsstatic FILE *
65277177Srrsmy_popen(const char *command, const char *dir, pid_t *p_pid)
66277177Srrs{
67277177Srrs	FILE *io_out, *io_in;
68277177Srrs	int pdesin[2], pdesout[2];
69277177Srrs	char *argv[4];
70277177Srrs	pid_t pid;
71277177Srrs	char cmd[4];
72277177Srrs	char cmd2[1024];
73277177Srrs	char arg1[4];
74277177Srrs
75277177Srrs	if ((strcmp(dir, "r") != 0) &&
76277177Srrs	    (strcmp(dir, "w") != 0)) {
77277177Srrs		errno = EINVAL;
78277177Srrs		return(NULL);
79277177Srrs	}
80277177Srrs	if (pipe(pdesin) < 0)
81277177Srrs		return (NULL);
82277177Srrs
83277177Srrs	if (pipe(pdesout) < 0) {
84277177Srrs		(void)close(pdesin[0]);
85277177Srrs		(void)close(pdesin[1]);
86277177Srrs		return (NULL);
87277177Srrs	}
88277177Srrs	strcpy(cmd, "sh");
89277177Srrs	strcpy(arg1, "-c");
90277177Srrs	strcpy(cmd2, command);
91277177Srrs	argv[0] = cmd;
92277177Srrs	argv[1] = arg1;
93277177Srrs	argv[2] = cmd2;
94277177Srrs	argv[3] = NULL;
95277177Srrs
96277177Srrs	switch (pid = fork()) {
97277177Srrs	case -1:			/* Error. */
98277177Srrs		(void)close(pdesin[0]);
99277177Srrs		(void)close(pdesin[1]);
100277177Srrs		(void)close(pdesout[0]);
101277177Srrs		(void)close(pdesout[1]);
102277177Srrs		return (NULL);
103277177Srrs		/* NOTREACHED */
104277177Srrs	case 0:				/* Child. */
105277177Srrs		/* Close out un-used sides */
106277177Srrs		(void)close(pdesin[1]);
107277177Srrs		(void)close(pdesout[0]);
108277177Srrs		/* Now prepare the stdin of the process */
109277177Srrs		close(0);
110277177Srrs		(void)dup(pdesin[0]);
111277177Srrs		(void)close(pdesin[0]);
112277177Srrs		/* Now prepare the stdout of the process */
113277177Srrs		close(1);
114277177Srrs		(void)dup(pdesout[1]);
115277177Srrs		/* And lets do stderr just in case */
116277177Srrs		close(2);
117277177Srrs		(void)dup(pdesout[1]);
118277177Srrs		(void)close(pdesout[1]);
119277177Srrs		/* Now run it */
120277177Srrs		execve("/bin/sh", argv, environ);
121277177Srrs		exit(127);
122277177Srrs		/* NOTREACHED */
123277177Srrs	}
124277177Srrs	/* Parent; assume fdopen can't fail. */
125277177Srrs	/* Store the pid */
126277177Srrs	*p_pid = pid;
127277177Srrs	if (strcmp(dir, "r") != 0) {
128277177Srrs		io_out = fdopen(pdesin[1], "w");
129277177Srrs		(void)close(pdesin[0]);
130277177Srrs		(void)close(pdesout[0]);
131277177Srrs		(void)close(pdesout[1]);
132277177Srrs		return(io_out);
133277177Srrs 	} else {
134277177Srrs		/* Prepare the input stream */
135277177Srrs		io_in = fdopen(pdesout[0], "r");
136277177Srrs		(void)close(pdesout[1]);
137277177Srrs		(void)close(pdesin[0]);
138277177Srrs		(void)close(pdesin[1]);
139277177Srrs		return (io_in);
140277177Srrs	}
141277177Srrs}
142277177Srrs
143277177Srrs/*
144277177Srrs * pclose --
145277177Srrs *	Pclose returns -1 if stream is not associated with a `popened' command,
146277177Srrs *	if already `pclosed', or waitpid returns an error.
147277177Srrs */
148277177Srrsstatic void
149277177Srrsmy_pclose(FILE *io, pid_t the_pid)
150277177Srrs{
151277177Srrs	int pstat;
152277177Srrs	pid_t pid;
153277177Srrs
154277177Srrs	/*
155277177Srrs	 * Find the appropriate file pointer and remove it from the list.
156277177Srrs	 */
157277177Srrs	(void)fclose(io);
158277177Srrs	/* Die if you are not dead! */
159277177Srrs	kill(the_pid, SIGTERM);
160277177Srrs	do {
161277177Srrs		pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
162277177Srrs	} while (pid == -1 && errno == EINTR);
163277177Srrs}
164277177Srrs
165277177Srrsstruct counters {
166277177Srrs	struct counters *next_cpu;
167277177Srrs	char counter_name[MAX_NLEN];		/* Name of counter */
168277177Srrs	int cpu;				/* CPU we are on */
169277177Srrs	int pos;				/* Index we are filling to. */
170277177Srrs	uint64_t vals[MAX_COUNTER_SLOTS];	/* Last 64 entries */
171277177Srrs	uint64_t sum;				/* Summary of entries */
172277177Srrs};
173277177Srrs
174277177Srrsextern struct counters *glob_cpu[MAX_CPU];
175277177Srrsstruct counters *glob_cpu[MAX_CPU];
176277177Srrs
177277177Srrsextern struct counters *cnts;
178277177Srrsstruct counters *cnts=NULL;
179277177Srrs
180277177Srrsextern int ncnts;
181277177Srrsint ncnts=0;
182277177Srrs
183277177Srrsextern int (*expression)(struct counters *, int);
184277177Srrsint (*expression)(struct counters *, int);
185277177Srrs
186277177Srrsstatic const char *threshold=NULL;
187277177Srrsstatic const char *command;
188277177Srrs
189277177Srrsstruct cpu_entry {
190277177Srrs	const char *name;
191277177Srrs	const char *thresh;
192277177Srrs	const char *command;
193277177Srrs	int (*func)(struct counters *, int);
194277177Srrs};
195277177Srrs
196277177Srrs
197277177Srrsstruct cpu_type {
198277177Srrs	char cputype[32];
199277177Srrs	int number;
200277177Srrs	struct cpu_entry *ents;
201277177Srrs	void (*explain)(const char *name);
202277177Srrs};
203277177Srrsextern struct cpu_type the_cpu;
204277177Srrsstruct cpu_type the_cpu;
205277177Srrs
206277177Srrsstatic void
207277177Srrsexplain_name_sb(const char *name)
208277177Srrs{
209277177Srrs	const char *mythresh;
210277177Srrs	if (strcmp(name, "allocstall1") == 0) {
211277177Srrs		printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
212277177Srrs		mythresh = "thresh > .05";
213277177Srrs	} else if (strcmp(name, "allocstall2") == 0) {
214277177Srrs		printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
215277177Srrs		mythresh = "thresh > .05";
216277177Srrs	} else if (strcmp(name, "br_miss") == 0) {
217277177Srrs		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
218277177Srrs		mythresh = "thresh >= .2";
219277177Srrs	} else if (strcmp(name, "splitload") == 0) {
220277177Srrs		printf("Examine MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
221277177Srrs		mythresh = "thresh >= .1";
222277177Srrs	} else if (strcmp(name, "splitstore") == 0) {
223277177Srrs		printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
224277177Srrs		mythresh = "thresh >= .01";
225277177Srrs	} else if (strcmp(name, "contested") == 0) {
226277177Srrs		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
227277177Srrs		mythresh = "thresh >= .05";
228277177Srrs	} else if (strcmp(name, "blockstorefwd") == 0) {
229277177Srrs		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
230277177Srrs		mythresh = "thresh >= .05";
231277177Srrs	} else if (strcmp(name, "cache2") == 0) {
232277177Srrs		printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
233277177Srrs		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
234277177Srrs		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
235277177Srrs		printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
236277177Srrs		mythresh = "thresh >= .2";
237277177Srrs	} else if (strcmp(name, "cache1") == 0) {
238277177Srrs		printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
239277177Srrs		mythresh = "thresh >= .2";
240277177Srrs	} else if (strcmp(name, "dtlbmissload") == 0) {
241277177Srrs		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
242277177Srrs		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
243277177Srrs		mythresh = "thresh >= .1";
244277177Srrs	} else if (strcmp(name, "frontendstall") == 0) {
245277177Srrs		printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
246277177Srrs		mythresh = "thresh >= .15";
247277177Srrs	} else if (strcmp(name, "clears") == 0) {
248277177Srrs		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
249277177Srrs		printf("          MACHINE_CLEARS.SMC + \n");
250277177Srrs		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
251277177Srrs		mythresh = "thresh >= .02";
252277177Srrs	} else if (strcmp(name, "microassist") == 0) {
253277177Srrs		printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
254277177Srrs		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
255277177Srrs		mythresh = "thresh >= .05";
256277177Srrs	} else if (strcmp(name, "aliasing_4k") == 0) {
257277177Srrs		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
258277177Srrs		mythresh = "thresh >= .1";
259277177Srrs	} else if (strcmp(name, "fpassist") == 0) {
260277177Srrs		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
261277177Srrs		mythresh = "look for a excessive value";
262277177Srrs	} else if (strcmp(name, "otherassistavx") == 0) {
263277177Srrs		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
264277177Srrs		mythresh = "look for a excessive value";
265277177Srrs	} else if (strcmp(name, "otherassistsse") == 0) {
266277177Srrs		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
267277177Srrs		mythresh = "look for a excessive value";
268277177Srrs	} else if (strcmp(name, "eff1") == 0) {
269277177Srrs		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
270277177Srrs		mythresh = "thresh < .9";
271277177Srrs	} else if (strcmp(name, "eff2") == 0) {
272277177Srrs		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
273277177Srrs		mythresh = "thresh > 1.0";
274277177Srrs	} else if (strcmp(name, "dtlbmissstore") == 0) {
275277177Srrs		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
276277177Srrs		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
277277177Srrs		mythresh = "thresh >= .05";
278277177Srrs	} else {
279277177Srrs		printf("Unknown name:%s\n", name);
280277177Srrs		mythresh = "unknown entry";
281277177Srrs        }
282277177Srrs	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
283277177Srrs}
284277177Srrs
285277177Srrsstatic void
286277177Srrsexplain_name_ib(const char *name)
287277177Srrs{
288277177Srrs	const char *mythresh;
289277177Srrs	if (strcmp(name, "br_miss") == 0) {
290277177Srrs		printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
291277177Srrs		printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
292277177Srrs		printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
293277177Srrs		mythresh = "thresh >= .2";
294277177Srrs	} else if (strcmp(name, "eff1") == 0) {
295277177Srrs		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
296277177Srrs		mythresh = "thresh < .9";
297277177Srrs	} else if (strcmp(name, "eff2") == 0) {
298277177Srrs		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
299277177Srrs		mythresh = "thresh > 1.0";
300277177Srrs	} else if (strcmp(name, "cache1") == 0) {
301277177Srrs		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
302277177Srrs		mythresh = "thresh >= .2";
303277177Srrs	} else if (strcmp(name, "cache2") == 0) {
304277177Srrs		printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
305277177Srrs		mythresh = "thresh >= .2";
306277177Srrs	} else if (strcmp(name, "itlbmiss") == 0) {
307277177Srrs		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
308277177Srrs		mythresh = "thresh > .05";
309277177Srrs	} else if (strcmp(name, "icachemiss") == 0) {
310277177Srrs		printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
311277177Srrs		mythresh = "thresh > .05";
312277177Srrs	} else if (strcmp(name, "lcpstall") == 0) {
313277177Srrs		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
314277177Srrs		mythresh = "thresh > .05";
315277177Srrs	} else if (strcmp(name, "datashare") == 0) {
316277177Srrs		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
317277177Srrs		mythresh = "thresh > .05";
318277177Srrs	} else if (strcmp(name, "blockstorefwd") == 0) {
319277177Srrs		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
320277177Srrs		mythresh = "thresh >= .05";
321277177Srrs	} else if (strcmp(name, "splitload") == 0) {
322277177Srrs		printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
323277177Srrs		printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
324277177Srrs		mythresh = "thresh >= .1";
325277177Srrs	} else if (strcmp(name, "splitstore") == 0) {
326277177Srrs		printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
327277177Srrs		mythresh = "thresh >= .01";
328277177Srrs	} else if (strcmp(name, "aliasing_4k") == 0) {
329277177Srrs		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
330277177Srrs		mythresh = "thresh >= .1";
331277177Srrs	} else if (strcmp(name, "dtlbmissload") == 0) {
332277177Srrs		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
333277177Srrs		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
334277177Srrs		mythresh = "thresh >= .1";
335277177Srrs	} else if (strcmp(name, "dtlbmissstore") == 0) {
336277177Srrs		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
337277177Srrs		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
338277177Srrs		mythresh = "thresh >= .05";
339277177Srrs	} else if (strcmp(name, "contested") == 0) {
340277177Srrs		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
341277177Srrs		mythresh = "thresh >= .05";
342277177Srrs	} else if (strcmp(name, "clears") == 0) {
343277177Srrs		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
344277177Srrs		printf("          MACHINE_CLEARS.SMC + \n");
345277177Srrs		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
346277177Srrs		mythresh = "thresh >= .02";
347277177Srrs	} else if (strcmp(name, "microassist") == 0) {
348277177Srrs		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
349277177Srrs		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
350277177Srrs		mythresh = "thresh >= .05";
351277177Srrs	} else if (strcmp(name, "fpassist") == 0) {
352277177Srrs		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
353277177Srrs		mythresh = "look for a excessive value";
354277177Srrs	} else if (strcmp(name, "otherassistavx") == 0) {
355277177Srrs		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
356277177Srrs		mythresh = "look for a excessive value";
357277177Srrs	} else if (strcmp(name, "otherassistsse") == 0) {
358277177Srrs		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
359277177Srrs		mythresh = "look for a excessive value";
360277177Srrs	} else {
361277177Srrs		printf("Unknown name:%s\n", name);
362277177Srrs		mythresh = "unknown entry";
363277177Srrs        }
364277177Srrs	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
365277177Srrs}
366277177Srrs
367277177Srrs
368277177Srrsstatic void
369277177Srrsexplain_name_has(const char *name)
370277177Srrs{
371277177Srrs	const char *mythresh;
372277177Srrs	if (strcmp(name, "eff1") == 0) {
373277177Srrs		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
374277177Srrs		mythresh = "thresh < .75";
375277177Srrs	} else if (strcmp(name, "eff2") == 0) {
376277177Srrs		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
377277177Srrs		mythresh = "thresh > 1.0";
378277177Srrs	} else if (strcmp(name, "itlbmiss") == 0) {
379277177Srrs		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
380277177Srrs		mythresh = "thresh > .05";
381277177Srrs	} else if (strcmp(name, "icachemiss") == 0) {
382277177Srrs		printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
383277177Srrs		mythresh = "thresh > .05";
384277177Srrs	} else if (strcmp(name, "lcpstall") == 0) {
385277177Srrs		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
386277177Srrs		mythresh = "thresh > .05";
387277177Srrs	} else if (strcmp(name, "cache1") == 0) {
388277177Srrs		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
389277177Srrs		mythresh = "thresh >= .2";
390277177Srrs	} else if (strcmp(name, "cache2") == 0) {
391277177Srrs		printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
392277177Srrs		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
393277177Srrs		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
394277177Srrs		printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
395277177Srrs		mythresh = "thresh >= .2";
396277177Srrs	} else if (strcmp(name, "contested") == 0) {
397277177Srrs		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
398277177Srrs		mythresh = "thresh >= .05";
399277177Srrs	} else if (strcmp(name, "datashare") == 0) {
400277177Srrs		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
401277177Srrs		mythresh = "thresh > .05";
402277177Srrs	} else if (strcmp(name, "blockstorefwd") == 0) {
403277177Srrs		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
404277177Srrs		mythresh = "thresh >= .05";
405277177Srrs	} else if (strcmp(name, "splitload") == 0) {
406277177Srrs		printf("Examine  (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
407277177Srrs		mythresh = "thresh >= .1";
408277177Srrs	} else if (strcmp(name, "splitstore") == 0) {
409277177Srrs		printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
410277177Srrs		mythresh = "thresh >= .01";
411277177Srrs	} else if (strcmp(name, "aliasing_4k") == 0) {
412277177Srrs		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
413277177Srrs		mythresh = "thresh >= .1";
414277177Srrs	} else if (strcmp(name, "dtlbmissload") == 0) {
415277177Srrs		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
416277177Srrs		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
417277177Srrs		mythresh = "thresh >= .1";
418277177Srrs	} else if (strcmp(name, "br_miss") == 0) {
419277177Srrs		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
420277177Srrs		mythresh = "thresh >= .2";
421277177Srrs	} else if (strcmp(name, "clears") == 0) {
422277177Srrs		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
423277177Srrs		printf("          MACHINE_CLEARS.SMC + \n");
424277177Srrs		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
425277177Srrs		mythresh = "thresh >= .02";
426277177Srrs	} else if (strcmp(name, "microassist") == 0) {
427277177Srrs		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
428277177Srrs		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
429277177Srrs		mythresh = "thresh >= .05";
430277177Srrs	} else if (strcmp(name, "fpassist") == 0) {
431277177Srrs		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
432277177Srrs		mythresh = "look for a excessive value";
433277177Srrs	} else if (strcmp(name, "otherassistavx") == 0) {
434277177Srrs		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
435277177Srrs		mythresh = "look for a excessive value";
436277177Srrs	} else if (strcmp(name, "otherassistsse") == 0) {
437277177Srrs		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
438277177Srrs		mythresh = "look for a excessive value";
439277177Srrs	} else {
440277177Srrs		printf("Unknown name:%s\n", name);
441277177Srrs		mythresh = "unknown entry";
442277177Srrs        }
443277177Srrs	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
444277177Srrs}
445277177Srrs
446277177Srrs
447277177Srrsstatic struct counters *
448277177Srrsfind_counter(struct counters *base, const char *name)
449277177Srrs{
450277177Srrs	struct counters *at;
451277177Srrs	int len;
452277177Srrs
453277177Srrs	at = base;
454277177Srrs	len = strlen(name);
455277177Srrs	while(at) {
456277177Srrs		if (strncmp(at->counter_name, name, len) == 0) {
457277177Srrs			return(at);
458277177Srrs		}
459277177Srrs		at = at->next_cpu;
460277177Srrs	}
461277177Srrs	printf("Can't find counter %s\n", name);
462277177Srrs	printf("We have:\n");
463277177Srrs	at = base;
464277177Srrs	while(at) {
465277177Srrs		printf("- %s\n", at->counter_name);
466277177Srrs		at = at->next_cpu;
467277177Srrs	}
468277177Srrs	exit(-1);
469277177Srrs}
470277177Srrs
471277177Srrsstatic int
472277177Srrsallocstall1(struct counters *cpu, int pos)
473277177Srrs{
474277177Srrs/*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
475277177Srrs	int ret;
476277177Srrs	struct counters *partial;
477277177Srrs	struct counters *unhalt;
478277177Srrs	double un, par, res;
479277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
480277177Srrs	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
481277177Srrs	if (pos != -1) {
482277177Srrs		par = partial->vals[pos] * 1.0;
483277177Srrs		un = unhalt->vals[pos] * 1.0;
484277177Srrs	} else {
485277177Srrs		par = partial->sum * 1.0;
486277177Srrs		un = unhalt->sum * 1.0;
487277177Srrs	}
488277177Srrs	res = par/un;
489277177Srrs	ret = printf("%1.3f", res);
490277177Srrs	return(ret);
491277177Srrs}
492277177Srrs
493277177Srrsstatic int
494277177Srrsallocstall2(struct counters *cpu, int pos)
495277177Srrs{
496277177Srrs/*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
497277177Srrs	int ret;
498277177Srrs	struct counters *partial;
499277177Srrs	struct counters *unhalt;
500277177Srrs	double un, par, res;
501277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
502277177Srrs	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
503277177Srrs	if (pos != -1) {
504277177Srrs		par = partial->vals[pos] * 1.0;
505277177Srrs		un = unhalt->vals[pos] * 1.0;
506277177Srrs	} else {
507277177Srrs		par = partial->sum * 1.0;
508277177Srrs		un = unhalt->sum * 1.0;
509277177Srrs	}
510277177Srrs	res = par/un;
511277177Srrs	ret = printf("%1.3f", res);
512277177Srrs	return(ret);
513277177Srrs}
514277177Srrs
515277177Srrsstatic int
516277177Srrsbr_mispredict(struct counters *cpu, int pos)
517277177Srrs{
518277177Srrs	struct counters *brctr;
519277177Srrs	struct counters *unhalt;
520277177Srrs	int ret;
521277177Srrs/*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
522277177Srrs	double br, un, con, res;
523277177Srrs	con = 20.0;
524277177Srrs
525277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
526277177Srrs        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
527277177Srrs	if (pos != -1) {
528277177Srrs		br = brctr->vals[pos] * 1.0;
529277177Srrs		un = unhalt->vals[pos] * 1.0;
530277177Srrs	} else {
531277177Srrs		br = brctr->sum * 1.0;
532277177Srrs		un = unhalt->sum * 1.0;
533277177Srrs	}
534277177Srrs	res = (con * br)/un;
535277177Srrs 	ret = printf("%1.3f", res);
536277177Srrs	return(ret);
537277177Srrs}
538277177Srrs
539277177Srrsstatic int
540277177Srrsbr_mispredictib(struct counters *cpu, int pos)
541277177Srrs{
542277177Srrs	struct counters *brctr;
543277177Srrs	struct counters *unhalt;
544277177Srrs	struct counters *clear, *clear2, *clear3;
545277177Srrs	struct counters *uops;
546277177Srrs	struct counters *recv;
547277177Srrs	struct counters *iss;
548277177Srrs/*	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
549277177Srrs	int ret;
550277177Srrs        /*
551277177Srrs	 * (BR_MISP_RETIRED.ALL_BRANCHES /
552277177Srrs	 *         (BR_MISP_RETIRED.ALL_BRANCHES +
553277177Srrs	 *          MACHINE_CLEAR.COUNT) *
554277177Srrs	 *	   ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
555277177Srrs	 *
556277177Srrs	 */
557277177Srrs	double br, cl, cl2, cl3, uo, re, un, con, res, is;
558277177Srrs	con = 4.0;
559277177Srrs
560277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
561277177Srrs        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
562277177Srrs	clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
563277177Srrs	clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
564277177Srrs	clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
565277177Srrs	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
566277177Srrs	iss = find_counter(cpu, "UOPS_ISSUED.ANY");
567277177Srrs	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
568277177Srrs	if (pos != -1) {
569277177Srrs		br = brctr->vals[pos] * 1.0;
570277177Srrs		cl = clear->vals[pos] * 1.0;
571277177Srrs		cl2 = clear2->vals[pos] * 1.0;
572277177Srrs		cl3 = clear3->vals[pos] * 1.0;
573277177Srrs		uo = uops->vals[pos] * 1.0;
574277177Srrs		re = recv->vals[pos] * 1.0;
575277177Srrs		is = iss->vals[pos] * 1.0;
576277177Srrs		un = unhalt->vals[pos] * 1.0;
577277177Srrs	} else {
578277177Srrs		br = brctr->sum * 1.0;
579277177Srrs		cl = clear->sum * 1.0;
580277177Srrs		cl2 = clear2->sum * 1.0;
581277177Srrs		cl3 = clear3->sum * 1.0;
582277177Srrs		uo = uops->sum * 1.0;
583277177Srrs		re = recv->sum * 1.0;
584277177Srrs		is = iss->sum * 1.0;
585277177Srrs		un = unhalt->sum * 1.0;
586277177Srrs	}
587277177Srrs	res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
588277177Srrs 	ret = printf("%1.3f", res);
589277177Srrs	return(ret);
590277177Srrs}
591277177Srrs
592277177Srrsstatic int
593277177Srrssplitloadib(struct counters *cpu, int pos)
594277177Srrs{
595277177Srrs	int ret;
596277177Srrs	struct counters *mem;
597277177Srrs	struct counters *l1d, *ldblock;
598277177Srrs	struct counters *unhalt;
599277177Srrs	double un, memd, res, l1, ldb;
600277177Srrs        /*
601277177Srrs	 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
602277177Srrs	 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
603277177Srrs	 */
604277177Srrs
605277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
606277177Srrs	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
607277177Srrs	l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
608277177Srrs	ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
609277177Srrs	if (pos != -1) {
610277177Srrs		memd = mem->vals[pos] * 1.0;
611277177Srrs		l1 = l1d->vals[pos] * 1.0;
612277177Srrs		ldb = ldblock->vals[pos] * 1.0;
613277177Srrs		un = unhalt->vals[pos] * 1.0;
614277177Srrs	} else {
615277177Srrs		memd = mem->sum * 1.0;
616277177Srrs		l1 = l1d->sum * 1.0;
617277177Srrs		ldb = ldblock->sum * 1.0;
618277177Srrs		un = unhalt->sum * 1.0;
619277177Srrs	}
620277177Srrs	res = ((l1 / memd) * ldb)/un;
621277177Srrs	ret = printf("%1.3f", res);
622277177Srrs	return(ret);
623277177Srrs}
624277177Srrs
625277177Srrsstatic int
626277177Srrssplitload(struct counters *cpu, int pos)
627277177Srrs{
628277177Srrs	int ret;
629277177Srrs	struct counters *mem;
630277177Srrs	struct counters *unhalt;
631277177Srrs	double con, un, memd, res;
632277177Srrs/*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
633277177Srrs
634277177Srrs	con = 5.0;
635277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
636277177Srrs	mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
637277177Srrs	if (pos != -1) {
638277177Srrs		memd = mem->vals[pos] * 1.0;
639277177Srrs		un = unhalt->vals[pos] * 1.0;
640277177Srrs	} else {
641277177Srrs		memd = mem->sum * 1.0;
642277177Srrs		un = unhalt->sum * 1.0;
643277177Srrs	}
644277177Srrs	res = (memd * con)/un;
645277177Srrs	ret = printf("%1.3f", res);
646277177Srrs	return(ret);
647277177Srrs}
648277177Srrs
649277177Srrsstatic int
650277177Srrssplitstore(struct counters *cpu, int pos)
651277177Srrs{
652277177Srrs        /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
653277177Srrs	int ret;
654277177Srrs	struct counters *mem_split;
655277177Srrs	struct counters *mem_stores;
656277177Srrs	double memsplit, memstore, res;
657277177Srrs	mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
658277177Srrs	mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
659277177Srrs	if (pos != -1) {
660277177Srrs		memsplit = mem_split->vals[pos] * 1.0;
661277177Srrs		memstore = mem_stores->vals[pos] * 1.0;
662277177Srrs	} else {
663277177Srrs		memsplit = mem_split->sum * 1.0;
664277177Srrs		memstore = mem_stores->sum * 1.0;
665277177Srrs	}
666277177Srrs	res = memsplit/memstore;
667277177Srrs	ret = printf("%1.3f", res);
668277177Srrs	return(ret);
669277177Srrs}
670277177Srrs
671277177Srrs
672277177Srrsstatic int
673277177Srrscontested(struct counters *cpu, int pos)
674277177Srrs{
675277177Srrs        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
676277177Srrs	int ret;
677277177Srrs	struct counters *mem;
678277177Srrs	struct counters *unhalt;
679277177Srrs	double con, un, memd, res;
680277177Srrs
681277177Srrs	con = 60.0;
682277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
683277177Srrs	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
684277177Srrs	if (pos != -1) {
685277177Srrs		memd = mem->vals[pos] * 1.0;
686277177Srrs		un = unhalt->vals[pos] * 1.0;
687277177Srrs	} else {
688277177Srrs		memd = mem->sum * 1.0;
689277177Srrs		un = unhalt->sum * 1.0;
690277177Srrs	}
691277177Srrs	res = (memd * con)/un;
692277177Srrs	ret = printf("%1.3f", res);
693277177Srrs	return(ret);
694277177Srrs}
695277177Srrs
696277177Srrsstatic int
697277177Srrscontested_has(struct counters *cpu, int pos)
698277177Srrs{
699277177Srrs        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
700277177Srrs	int ret;
701277177Srrs	struct counters *mem;
702277177Srrs	struct counters *unhalt;
703277177Srrs	double con, un, memd, res;
704277177Srrs
705277177Srrs	con = 84.0;
706277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
707277177Srrs	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
708277177Srrs	if (pos != -1) {
709277177Srrs		memd = mem->vals[pos] * 1.0;
710277177Srrs		un = unhalt->vals[pos] * 1.0;
711277177Srrs	} else {
712277177Srrs		memd = mem->sum * 1.0;
713277177Srrs		un = unhalt->sum * 1.0;
714277177Srrs	}
715277177Srrs	res = (memd * con)/un;
716277177Srrs	ret = printf("%1.3f", res);
717277177Srrs	return(ret);
718277177Srrs}
719277177Srrs
720277177Srrs
721277177Srrsstatic int
722277177Srrsblockstoreforward(struct counters *cpu, int pos)
723277177Srrs{
724277177Srrs        /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
725277177Srrs	int ret;
726277177Srrs	struct counters *ldb;
727277177Srrs	struct counters *unhalt;
728277177Srrs	double con, un, ld, res;
729277177Srrs
730277177Srrs	con = 13.0;
731277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
732277177Srrs	ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
733277177Srrs	if (pos != -1) {
734277177Srrs		ld = ldb->vals[pos] * 1.0;
735277177Srrs		un = unhalt->vals[pos] * 1.0;
736277177Srrs	} else {
737277177Srrs		ld = ldb->sum * 1.0;
738277177Srrs		un = unhalt->sum * 1.0;
739277177Srrs	}
740277177Srrs	res = (ld * con)/un;
741277177Srrs	ret = printf("%1.3f", res);
742277177Srrs	return(ret);
743277177Srrs}
744277177Srrs
745277177Srrsstatic int
746277177Srrscache2(struct counters *cpu, int pos)
747277177Srrs{
748277177Srrs	/* ** Suspect ***
749277177Srrs	 *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
750277177Srrs	 *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
751277177Srrs	 */
752277177Srrs	int ret;
753277177Srrs	struct counters *mem1, *mem2, *mem3;
754277177Srrs	struct counters *unhalt;
755277177Srrs	double con1, con2, con3, un, me_1, me_2, me_3, res;
756277177Srrs
757277177Srrs	con1 = 26.0;
758277177Srrs	con2 = 43.0;
759277177Srrs	con3 = 60.0;
760277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
761277177Srrs/* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
762277177Srrs	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
763277177Srrs	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
764277177Srrs	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
765277177Srrs	if (pos != -1) {
766277177Srrs		me_1 = mem1->vals[pos] * 1.0;
767277177Srrs		me_2 = mem2->vals[pos] * 1.0;
768277177Srrs		me_3 = mem3->vals[pos] * 1.0;
769277177Srrs		un = unhalt->vals[pos] * 1.0;
770277177Srrs	} else {
771277177Srrs		me_1 = mem1->sum * 1.0;
772277177Srrs		me_2 = mem2->sum * 1.0;
773277177Srrs		me_3 = mem3->sum * 1.0;
774277177Srrs		un = unhalt->sum * 1.0;
775277177Srrs	}
776277177Srrs	res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
777277177Srrs	ret = printf("%1.3f", res);
778277177Srrs	return(ret);
779277177Srrs}
780277177Srrs
781277177Srrsstatic int
782277177Srrsdatasharing(struct counters *cpu, int pos)
783277177Srrs{
784277177Srrs	/*
785277177Srrs	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
786277177Srrs	 */
787277177Srrs	int ret;
788277177Srrs	struct counters *mem;
789277177Srrs	struct counters *unhalt;
790277177Srrs	double con, res, me, un;
791277177Srrs
792277177Srrs	con = 43.0;
793277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
794277177Srrs	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
795277177Srrs	if (pos != -1) {
796277177Srrs		me = mem->vals[pos] * 1.0;
797277177Srrs		un = unhalt->vals[pos] * 1.0;
798277177Srrs	} else {
799277177Srrs		me = mem->sum * 1.0;
800277177Srrs		un = unhalt->sum * 1.0;
801277177Srrs	}
802277177Srrs	res = (me * con)/un;
803277177Srrs	ret = printf("%1.3f", res);
804277177Srrs	return(ret);
805277177Srrs
806277177Srrs}
807277177Srrs
808277177Srrs
809277177Srrsstatic int
810277177Srrsdatasharing_has(struct counters *cpu, int pos)
811277177Srrs{
812277177Srrs	/*
813277177Srrs	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
814277177Srrs	 */
815277177Srrs	int ret;
816277177Srrs	struct counters *mem;
817277177Srrs	struct counters *unhalt;
818277177Srrs	double con, res, me, un;
819277177Srrs
820277177Srrs	con = 72.0;
821277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
822277177Srrs	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
823277177Srrs	if (pos != -1) {
824277177Srrs		me = mem->vals[pos] * 1.0;
825277177Srrs		un = unhalt->vals[pos] * 1.0;
826277177Srrs	} else {
827277177Srrs		me = mem->sum * 1.0;
828277177Srrs		un = unhalt->sum * 1.0;
829277177Srrs	}
830277177Srrs	res = (me * con)/un;
831277177Srrs	ret = printf("%1.3f", res);
832277177Srrs	return(ret);
833277177Srrs
834277177Srrs}
835277177Srrs
836277177Srrs
837277177Srrsstatic int
838277177Srrscache2ib(struct counters *cpu, int pos)
839277177Srrs{
840277177Srrs        /*
841277177Srrs	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
842277177Srrs	 */
843277177Srrs	int ret;
844277177Srrs	struct counters *mem;
845277177Srrs	struct counters *unhalt;
846277177Srrs	double con, un, me, res;
847277177Srrs
848277177Srrs	con = 29.0;
849277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
850277177Srrs	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
851277177Srrs	if (pos != -1) {
852277177Srrs		me = mem->vals[pos] * 1.0;
853277177Srrs		un = unhalt->vals[pos] * 1.0;
854277177Srrs	} else {
855277177Srrs		me = mem->sum * 1.0;
856277177Srrs		un = unhalt->sum * 1.0;
857277177Srrs	}
858277177Srrs	res = (con * me)/un;
859277177Srrs	ret = printf("%1.3f", res);
860277177Srrs	return(ret);
861277177Srrs}
862277177Srrs
863277177Srrsstatic int
864277177Srrscache2has(struct counters *cpu, int pos)
865277177Srrs{
866277177Srrs	/*
867277177Srrs	 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
868277177Srrs	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
869277177Srrs	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
870277177Srrs	 *           / CPU_CLK_UNHALTED.THREAD_P
871277177Srrs	 */
872277177Srrs	int ret;
873277177Srrs	struct counters *mem1, *mem2, *mem3;
874277177Srrs	struct counters *unhalt;
875277177Srrs	double con1, con2, con3, un, me1, me2, me3, res;
876277177Srrs
877277177Srrs	con1 = 36.0;
878277177Srrs	con2 = 72.0;
879277177Srrs	con3 = 84.0;
880277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
881277177Srrs	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
882277177Srrs	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
883277177Srrs	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
884277177Srrs	if (pos != -1) {
885277177Srrs		me1 = mem1->vals[pos] * 1.0;
886277177Srrs		me2 = mem2->vals[pos] * 1.0;
887277177Srrs		me3 = mem3->vals[pos] * 1.0;
888277177Srrs		un = unhalt->vals[pos] * 1.0;
889277177Srrs	} else {
890277177Srrs		me1 = mem1->sum * 1.0;
891277177Srrs		me2 = mem2->sum * 1.0;
892277177Srrs		me3 = mem3->sum * 1.0;
893277177Srrs		un = unhalt->sum * 1.0;
894277177Srrs	}
895277177Srrs	res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
896277177Srrs	ret = printf("%1.3f", res);
897277177Srrs	return(ret);
898277177Srrs}
899277177Srrs
900277177Srrsstatic int
901277177Srrscache1(struct counters *cpu, int pos)
902277177Srrs{
903277177Srrs	/*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
904277177Srrs	int ret;
905277177Srrs	struct counters *mem;
906277177Srrs	struct counters *unhalt;
907277177Srrs	double con, un, me, res;
908277177Srrs
909277177Srrs	con = 180.0;
910277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
911277177Srrs	mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
912277177Srrs	if (pos != -1) {
913277177Srrs		me = mem->vals[pos] * 1.0;
914277177Srrs		un = unhalt->vals[pos] * 1.0;
915277177Srrs	} else {
916277177Srrs		me = mem->sum * 1.0;
917277177Srrs		un = unhalt->sum * 1.0;
918277177Srrs	}
919277177Srrs	res = (me * con)/un;
920277177Srrs	ret = printf("%1.3f", res);
921277177Srrs	return(ret);
922277177Srrs}
923277177Srrs
924277177Srrsstatic int
925277177Srrscache1ib(struct counters *cpu, int pos)
926277177Srrs{
927277177Srrs	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
928277177Srrs	int ret;
929277177Srrs	struct counters *mem;
930277177Srrs	struct counters *unhalt;
931277177Srrs	double con, un, me, res;
932277177Srrs
933277177Srrs	con = 180.0;
934277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
935277177Srrs	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
936277177Srrs	if (pos != -1) {
937277177Srrs		me = mem->vals[pos] * 1.0;
938277177Srrs		un = unhalt->vals[pos] * 1.0;
939277177Srrs	} else {
940277177Srrs		me = mem->sum * 1.0;
941277177Srrs		un = unhalt->sum * 1.0;
942277177Srrs	}
943277177Srrs	res = (me * con)/un;
944277177Srrs	ret = printf("%1.3f", res);
945277177Srrs	return(ret);
946277177Srrs}
947277177Srrs
948277177Srrs
949277177Srrsstatic int
950277177Srrsdtlb_missload(struct counters *cpu, int pos)
951277177Srrs{
952277177Srrs	/* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
953277177Srrs	int ret;
954277177Srrs	struct counters *dtlb_m, *dtlb_d;
955277177Srrs	struct counters *unhalt;
956277177Srrs	double con, un, d1, d2, res;
957277177Srrs
958277177Srrs	con = 7.0;
959277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
960277177Srrs	dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
961277177Srrs	dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
962277177Srrs	if (pos != -1) {
963277177Srrs		d1 = dtlb_m->vals[pos] * 1.0;
964277177Srrs		d2 = dtlb_d->vals[pos] * 1.0;
965277177Srrs		un = unhalt->vals[pos] * 1.0;
966277177Srrs	} else {
967277177Srrs		d1 = dtlb_m->sum * 1.0;
968277177Srrs		d2 = dtlb_d->sum * 1.0;
969277177Srrs		un = unhalt->sum * 1.0;
970277177Srrs	}
971277177Srrs	res = ((d1 * con) + d2)/un;
972277177Srrs	ret = printf("%1.3f", res);
973277177Srrs	return(ret);
974277177Srrs}
975277177Srrs
976277177Srrsstatic int
977277177Srrsdtlb_missstore(struct counters *cpu, int pos)
978277177Srrs{
979277177Srrs        /*
980277177Srrs	 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
981277177Srrs	 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
982277177Srrs	 */
983277177Srrs        int ret;
984277177Srrs        struct counters *dtsb_m, *dtsb_d;
985277177Srrs        struct counters *unhalt;
986277177Srrs        double con, un, d1, d2, res;
987277177Srrs
988277177Srrs        con = 7.0;
989277177Srrs        unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
990277177Srrs        dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
991277177Srrs        dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
992277177Srrs        if (pos != -1) {
993277177Srrs                d1 = dtsb_m->vals[pos] * 1.0;
994277177Srrs                d2 = dtsb_d->vals[pos] * 1.0;
995277177Srrs                un = unhalt->vals[pos] * 1.0;
996277177Srrs        } else {
997277177Srrs                d1 = dtsb_m->sum * 1.0;
998277177Srrs                d2 = dtsb_d->sum * 1.0;
999277177Srrs                un = unhalt->sum * 1.0;
1000277177Srrs        }
1001277177Srrs        res = ((d1 * con) + d2)/un;
1002277177Srrs        ret = printf("%1.3f", res);
1003277177Srrs        return(ret);
1004277177Srrs}
1005277177Srrs
1006277177Srrsstatic int
1007277177Srrsitlb_miss(struct counters *cpu, int pos)
1008277177Srrs{
1009277177Srrs	/* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
1010277177Srrs	int ret;
1011277177Srrs	struct counters *itlb;
1012277177Srrs	struct counters *unhalt;
1013277177Srrs	double un, d1, res;
1014277177Srrs
1015277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1016277177Srrs	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1017277177Srrs	if (pos != -1) {
1018277177Srrs		d1 = itlb->vals[pos] * 1.0;
1019277177Srrs		un = unhalt->vals[pos] * 1.0;
1020277177Srrs	} else {
1021277177Srrs		d1 = itlb->sum * 1.0;
1022277177Srrs		un = unhalt->sum * 1.0;
1023277177Srrs	}
1024277177Srrs	res = d1/un;
1025277177Srrs	ret = printf("%1.3f", res);
1026277177Srrs	return(ret);
1027277177Srrs}
1028277177Srrs
1029277177Srrsstatic int
1030277177Srrsicache_miss(struct counters *cpu, int pos)
1031277177Srrs{
1032277177Srrs	/* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1033277177Srrs
1034277177Srrs	int ret;
1035277177Srrs	struct counters *itlb, *icache;
1036277177Srrs	struct counters *unhalt;
1037277177Srrs	double un, d1, ic, res;
1038277177Srrs
1039277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1040277177Srrs	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1041277177Srrs	icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1042277177Srrs	if (pos != -1) {
1043277177Srrs		d1 = itlb->vals[pos] * 1.0;
1044277177Srrs		ic = icache->vals[pos] * 1.0;
1045277177Srrs		un = unhalt->vals[pos] * 1.0;
1046277177Srrs	} else {
1047277177Srrs		d1 = itlb->sum * 1.0;
1048277177Srrs		ic = icache->sum * 1.0;
1049277177Srrs		un = unhalt->sum * 1.0;
1050277177Srrs	}
1051277177Srrs	res = (ic-d1)/un;
1052277177Srrs	ret = printf("%1.3f", res);
1053277177Srrs	return(ret);
1054277177Srrs
1055277177Srrs}
1056277177Srrs
1057277177Srrsstatic int
1058277177Srrsicache_miss_has(struct counters *cpu, int pos)
1059277177Srrs{
1060277177Srrs	/* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1061277177Srrs
1062277177Srrs	int ret;
1063277177Srrs	struct counters *icache;
1064277177Srrs	struct counters *unhalt;
1065277177Srrs	double un, con, ic, res;
1066277177Srrs
1067277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1068277177Srrs	icache = find_counter(cpu, "ICACHE.MISSES");
1069277177Srrs	con = 36.0;
1070277177Srrs	if (pos != -1) {
1071277177Srrs		ic = icache->vals[pos] * 1.0;
1072277177Srrs		un = unhalt->vals[pos] * 1.0;
1073277177Srrs	} else {
1074277177Srrs		ic = icache->sum * 1.0;
1075277177Srrs		un = unhalt->sum * 1.0;
1076277177Srrs	}
1077277177Srrs	res = (con * ic)/un;
1078277177Srrs	ret = printf("%1.3f", res);
1079277177Srrs	return(ret);
1080277177Srrs
1081277177Srrs}
1082277177Srrs
1083277177Srrsstatic int
1084277177Srrslcp_stall(struct counters *cpu, int pos)
1085277177Srrs{
1086277177Srrs         /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1087277177Srrs	int ret;
1088277177Srrs	struct counters *ild;
1089277177Srrs	struct counters *unhalt;
1090277177Srrs	double un, d1, res;
1091277177Srrs
1092277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1093277177Srrs	ild = find_counter(cpu, "ILD_STALL.LCP");
1094277177Srrs	if (pos != -1) {
1095277177Srrs		d1 = ild->vals[pos] * 1.0;
1096277177Srrs		un = unhalt->vals[pos] * 1.0;
1097277177Srrs	} else {
1098277177Srrs		d1 = ild->sum * 1.0;
1099277177Srrs		un = unhalt->sum * 1.0;
1100277177Srrs	}
1101277177Srrs	res = d1/un;
1102277177Srrs	ret = printf("%1.3f", res);
1103277177Srrs	return(ret);
1104277177Srrs
1105277177Srrs}
1106277177Srrs
1107277177Srrs
1108277177Srrsstatic int
1109277177Srrsfrontendstall(struct counters *cpu, int pos)
1110277177Srrs{
1111277177Srrs      /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1112277177Srrs	int ret;
1113277177Srrs	struct counters *idq;
1114277177Srrs	struct counters *unhalt;
1115277177Srrs	double con, un, id, res;
1116277177Srrs
1117277177Srrs	con = 4.0;
1118277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1119277177Srrs	idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1120277177Srrs	if (pos != -1) {
1121277177Srrs		id = idq->vals[pos] * 1.0;
1122277177Srrs		un = unhalt->vals[pos] * 1.0;
1123277177Srrs	} else {
1124277177Srrs		id = idq->sum * 1.0;
1125277177Srrs		un = unhalt->sum * 1.0;
1126277177Srrs	}
1127277177Srrs	res = id/(un * con);
1128277177Srrs	ret = printf("%1.3f", res);
1129277177Srrs	return(ret);
1130277177Srrs}
1131277177Srrs
1132277177Srrsstatic int
1133277177Srrsclears(struct counters *cpu, int pos)
1134277177Srrs{
1135277177Srrs	/* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1136277177Srrs	 *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
1137277177Srrs
1138277177Srrs	int ret;
1139277177Srrs	struct counters *clr1, *clr2, *clr3;
1140277177Srrs	struct counters *unhalt;
1141277177Srrs	double con, un, cl1, cl2, cl3, res;
1142277177Srrs
1143277177Srrs	con = 100.0;
1144277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1145277177Srrs	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1146277177Srrs	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1147277177Srrs	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1148277177Srrs
1149277177Srrs	if (pos != -1) {
1150277177Srrs		cl1 = clr1->vals[pos] * 1.0;
1151277177Srrs		cl2 = clr2->vals[pos] * 1.0;
1152277177Srrs		cl3 = clr3->vals[pos] * 1.0;
1153277177Srrs		un = unhalt->vals[pos] * 1.0;
1154277177Srrs	} else {
1155277177Srrs		cl1 = clr1->sum * 1.0;
1156277177Srrs		cl2 = clr2->sum * 1.0;
1157277177Srrs		cl3 = clr3->sum * 1.0;
1158277177Srrs		un = unhalt->sum * 1.0;
1159277177Srrs	}
1160277177Srrs	res = ((cl1 + cl2 + cl3) * con)/un;
1161277177Srrs	ret = printf("%1.3f", res);
1162277177Srrs	return(ret);
1163277177Srrs}
1164277177Srrs
1165277177Srrsstatic int
1166277177Srrsmicroassist(struct counters *cpu, int pos)
1167277177Srrs{
1168277177Srrs	/* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1169277177Srrs	int ret;
1170277177Srrs	struct counters *idq;
1171277177Srrs	struct counters *unhalt;
1172277177Srrs	double un, id, res, con;
1173277177Srrs
1174277177Srrs	con = 4.0;
1175277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1176277177Srrs	idq = find_counter(cpu, "IDQ.MS_UOPS");
1177277177Srrs	if (pos != -1) {
1178277177Srrs		id = idq->vals[pos] * 1.0;
1179277177Srrs		un = unhalt->vals[pos] * 1.0;
1180277177Srrs	} else {
1181277177Srrs		id = idq->sum * 1.0;
1182277177Srrs		un = unhalt->sum * 1.0;
1183277177Srrs	}
1184277177Srrs	res = id/(un * con);
1185277177Srrs	ret = printf("%1.3f", res);
1186277177Srrs	return(ret);
1187277177Srrs}
1188277177Srrs
1189277177Srrs
1190277177Srrsstatic int
1191277177Srrsaliasing(struct counters *cpu, int pos)
1192277177Srrs{
1193277177Srrs	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1194277177Srrs	int ret;
1195277177Srrs	struct counters *ld;
1196277177Srrs	struct counters *unhalt;
1197277177Srrs	double un, lds, con, res;
1198277177Srrs
1199277177Srrs	con = 5.0;
1200277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1201277177Srrs	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1202277177Srrs	if (pos != -1) {
1203277177Srrs		lds = ld->vals[pos] * 1.0;
1204277177Srrs		un = unhalt->vals[pos] * 1.0;
1205277177Srrs	} else {
1206277177Srrs		lds = ld->sum * 1.0;
1207277177Srrs		un = unhalt->sum * 1.0;
1208277177Srrs	}
1209277177Srrs	res = (lds * con)/un;
1210277177Srrs	ret = printf("%1.3f", res);
1211277177Srrs	return(ret);
1212277177Srrs}
1213277177Srrs
1214277177Srrsstatic int
1215277177Srrsfpassists(struct counters *cpu, int pos)
1216277177Srrs{
1217277177Srrs	/* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1218277177Srrs	int ret;
1219277177Srrs	struct counters *fp;
1220277177Srrs	struct counters *inst;
1221277177Srrs	double un, fpd, res;
1222277177Srrs
1223277177Srrs	inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1224277177Srrs	fp = find_counter(cpu, "FP_ASSIST.ANY");
1225277177Srrs	if (pos != -1) {
1226277177Srrs		fpd = fp->vals[pos] * 1.0;
1227277177Srrs		un = inst->vals[pos] * 1.0;
1228277177Srrs	} else {
1229277177Srrs		fpd = fp->sum * 1.0;
1230277177Srrs		un = inst->sum * 1.0;
1231277177Srrs	}
1232277177Srrs	res = fpd/un;
1233277177Srrs	ret = printf("%1.3f", res);
1234277177Srrs	return(ret);
1235277177Srrs}
1236277177Srrs
1237277177Srrsstatic int
1238277177Srrsotherassistavx(struct counters *cpu, int pos)
1239277177Srrs{
1240277177Srrs	/* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
1241277177Srrs	int ret;
1242277177Srrs	struct counters *oth;
1243277177Srrs	struct counters *unhalt;
1244277177Srrs	double un, ot, con, res;
1245277177Srrs
1246277177Srrs	con = 75.0;
1247277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1248277177Srrs	oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1249277177Srrs	if (pos != -1) {
1250277177Srrs		ot = oth->vals[pos] * 1.0;
1251277177Srrs		un = unhalt->vals[pos] * 1.0;
1252277177Srrs	} else {
1253277177Srrs		ot = oth->sum * 1.0;
1254277177Srrs		un = unhalt->sum * 1.0;
1255277177Srrs	}
1256277177Srrs	res = (ot * con)/un;
1257277177Srrs	ret = printf("%1.3f", res);
1258277177Srrs	return(ret);
1259277177Srrs}
1260277177Srrs
1261277177Srrsstatic int
1262277177Srrsotherassistsse(struct counters *cpu, int pos)
1263277177Srrs{
1264277177Srrs
1265277177Srrs	int ret;
1266277177Srrs	struct counters *oth;
1267277177Srrs	struct counters *unhalt;
1268277177Srrs	double un, ot, con, res;
1269277177Srrs
1270277177Srrs	/* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
1271277177Srrs	con = 75.0;
1272277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1273277177Srrs	oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1274277177Srrs	if (pos != -1) {
1275277177Srrs		ot = oth->vals[pos] * 1.0;
1276277177Srrs		un = unhalt->vals[pos] * 1.0;
1277277177Srrs	} else {
1278277177Srrs		ot = oth->sum * 1.0;
1279277177Srrs		un = unhalt->sum * 1.0;
1280277177Srrs	}
1281277177Srrs	res = (ot * con)/un;
1282277177Srrs	ret = printf("%1.3f", res);
1283277177Srrs	return(ret);
1284277177Srrs}
1285277177Srrs
1286277177Srrsstatic int
1287277177Srrsefficiency1(struct counters *cpu, int pos)
1288277177Srrs{
1289277177Srrs
1290277177Srrs	int ret;
1291277177Srrs	struct counters *uops;
1292277177Srrs	struct counters *unhalt;
1293277177Srrs	double un, ot, con, res;
1294277177Srrs
1295277177Srrs        /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1296277177Srrs	con = 4.0;
1297277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1298277177Srrs	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1299277177Srrs	if (pos != -1) {
1300277177Srrs		ot = uops->vals[pos] * 1.0;
1301277177Srrs		un = unhalt->vals[pos] * 1.0;
1302277177Srrs	} else {
1303277177Srrs		ot = uops->sum * 1.0;
1304277177Srrs		un = unhalt->sum * 1.0;
1305277177Srrs	}
1306277177Srrs	res = ot/(con * un);
1307277177Srrs	ret = printf("%1.3f", res);
1308277177Srrs	return(ret);
1309277177Srrs}
1310277177Srrs
1311277177Srrsstatic int
1312277177Srrsefficiency2(struct counters *cpu, int pos)
1313277177Srrs{
1314277177Srrs
1315277177Srrs	int ret;
1316277177Srrs	struct counters *uops;
1317277177Srrs	struct counters *unhalt;
1318277177Srrs	double un, ot, res;
1319277177Srrs
1320277177Srrs        /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1321277177Srrs	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1322277177Srrs	uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1323277177Srrs	if (pos != -1) {
1324277177Srrs		ot = uops->vals[pos] * 1.0;
1325277177Srrs		un = unhalt->vals[pos] * 1.0;
1326277177Srrs	} else {
1327277177Srrs		ot = uops->sum * 1.0;
1328277177Srrs		un = unhalt->sum * 1.0;
1329277177Srrs	}
1330277177Srrs	res = un/ot;
1331277177Srrs	ret = printf("%1.3f", res);
1332277177Srrs	return(ret);
1333277177Srrs}
1334277177Srrs
1335277177Srrs#define SANDY_BRIDGE_COUNT 20
1336277177Srrsstatic struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1337277177Srrs/*01*/	{ "allocstall1", "thresh > .05",
1338277177Srrs	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1339277177Srrs	  allocstall1 },
1340277177Srrs/*02*/	{ "allocstall2", "thresh > .05",
1341277177Srrs	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1",
1342277177Srrs	  allocstall2 },
1343277177Srrs/*03*/	{ "br_miss", "thresh >= .2",
1344277177Srrs	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1345277177Srrs	  br_mispredict },
1346277177Srrs/*04*/	{ "splitload", "thresh >= .1",
1347277177Srrs	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1348277177Srrs	  splitload },
1349277177Srrs/*05*/	{ "splitstore", "thresh >= .01",
1350277177Srrs	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1351277177Srrs	  splitstore },
1352277177Srrs/*06*/	{ "contested", "thresh >= .05",
1353277177Srrs	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1354277177Srrs	  contested },
1355277177Srrs/*07*/	{ "blockstorefwd", "thresh >= .05",
1356277177Srrs	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1357277177Srrs	  blockstoreforward },
1358277177Srrs/*08*/	{ "cache2", "thresh >= .2",
1359277177Srrs	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1360277177Srrs	  cache2 },
1361277177Srrs/*09*/	{ "cache1", "thresh >= .2",
1362277177Srrs	  "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1363277177Srrs	  cache1 },
1364277177Srrs/*10*/	{ "dtlbmissload", "thresh >= .1",
1365277177Srrs	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1366277177Srrs	  dtlb_missload },
1367277177Srrs/*11*/	{ "dtlbmissstore", "thresh >= .05",
1368277177Srrs	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1369277177Srrs	  dtlb_missstore },
1370277177Srrs/*12*/	{ "frontendstall", "thresh >= .15",
1371277177Srrs	  "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1372277177Srrs	  frontendstall },
1373277177Srrs/*13*/	{ "clears", "thresh >= .02",
1374277177Srrs	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1375277177Srrs	  clears },
1376277177Srrs/*14*/	{ "microassist", "thresh >= .05",
1377277177Srrs	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1378277177Srrs	  microassist },
1379277177Srrs/*15*/	{ "aliasing_4k", "thresh >= .1",
1380277177Srrs	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1381277177Srrs	  aliasing },
1382277177Srrs/*16*/	{ "fpassist", "look for a excessive value",
1383277177Srrs	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1384277177Srrs	  fpassists },
1385277177Srrs/*17*/	{ "otherassistavx", "look for a excessive value",
1386277177Srrs	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1387277177Srrs	  otherassistavx },
1388277177Srrs/*18*/	{ "otherassistsse", "look for a excessive value",
1389277177Srrs	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1390277177Srrs	  otherassistsse },
1391277177Srrs/*19*/	{ "eff1", "thresh < .9",
1392277177Srrs	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1393277177Srrs	  efficiency1 },
1394277177Srrs/*20*/	{ "eff2", "thresh > 1.0",
1395277177Srrs	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1396277177Srrs	  efficiency2 },
1397277177Srrs};
1398277177Srrs
1399277177Srrs
1400277177Srrs#define IVY_BRIDGE_COUNT 21
1401277177Srrsstatic struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1402277177Srrs/*1*/	{ "eff1", "thresh < .75",
1403277177Srrs	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1404277177Srrs	  efficiency1 },
1405277177Srrs/*2*/	{ "eff2", "thresh > 1.0",
1406277177Srrs	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1407277177Srrs	  efficiency2 },
1408277177Srrs/*3*/	{ "itlbmiss", "thresh > .05",
1409277177Srrs	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1410277177Srrs	  itlb_miss },
1411277177Srrs/*4*/	{ "icachemiss", "thresh > .05",
1412277177Srrs	  "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1413277177Srrs	  icache_miss },
1414277177Srrs/*5*/	{ "lcpstall", "thresh > .05",
1415277177Srrs	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1416277177Srrs	  lcp_stall },
1417277177Srrs/*6*/	{ "cache1", "thresh >= .2",
1418277177Srrs	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1419277177Srrs	  cache1ib },
1420277177Srrs/*7*/	{ "cache2", "thresh >= .2",
1421277177Srrs	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1422277177Srrs	  cache2ib },
1423277177Srrs/*8*/	{ "contested", "thresh >= .05",
1424277177Srrs	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1425277177Srrs	  contested },
1426277177Srrs/*9*/	{ "datashare", "thresh >= .05",
1427277177Srrs	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1428277177Srrs	  datasharing },
1429277177Srrs/*10*/	{ "blockstorefwd", "thresh >= .05",
1430277177Srrs	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1431277177Srrs	  blockstoreforward },
1432277177Srrs/*11*/	{ "splitload", "thresh >= .1",
1433277177Srrs	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1434277177Srrs	  splitloadib },
1435277177Srrs/*12*/	{ "splitstore", "thresh >= .01",
1436277177Srrs	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1437277177Srrs	  splitstore },
1438277177Srrs/*13*/	{ "aliasing_4k", "thresh >= .1",
1439277177Srrs	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1440277177Srrs	  aliasing },
1441277177Srrs/*14*/	{ "dtlbmissload", "thresh >= .1",
1442277177Srrs	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1443277177Srrs	  dtlb_missload },
1444277177Srrs/*15*/	{ "dtlbmissstore", "thresh >= .05",
1445277177Srrs	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1446277177Srrs	  dtlb_missstore },
1447277177Srrs/*16*/	{ "br_miss", "thresh >= .2",
1448277177Srrs	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1449277177Srrs	  br_mispredictib },
1450277177Srrs/*17*/	{ "clears", "thresh >= .02",
1451277177Srrs	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1452277177Srrs	  clears },
1453277177Srrs/*18*/	{ "microassist", "thresh >= .05",
1454277177Srrs	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1455277177Srrs	  microassist },
1456277177Srrs/*19*/	{ "fpassist", "look for a excessive value",
1457277177Srrs	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1458277177Srrs	  fpassists },
1459277177Srrs/*20*/	{ "otherassistavx", "look for a excessive value",
1460277177Srrs	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1461277177Srrs	  otherassistavx },
1462277177Srrs/*21*/	{ "otherassistsse", "look for a excessive value",
1463277177Srrs	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1464277177Srrs	  otherassistsse },
1465277177Srrs};
1466277177Srrs
1467277177Srrs#define HASWELL_COUNT 20
1468277177Srrsstatic struct cpu_entry haswell[HASWELL_COUNT] = {
1469277177Srrs/*1*/	{ "eff1", "thresh < .75",
1470277177Srrs	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1471277177Srrs	  efficiency1 },
1472277177Srrs/*2*/	{ "eff2", "thresh > 1.0",
1473277177Srrs	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1474277177Srrs	  efficiency2 },
1475277177Srrs/*3*/	{ "itlbmiss", "thresh > .05",
1476277177Srrs	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1477277177Srrs	  itlb_miss },
1478277177Srrs/*4*/	{ "icachemiss", "thresh > .05",
1479277177Srrs	  "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1",
1480277177Srrs	  icache_miss_has },
1481277177Srrs/*5*/	{ "lcpstall", "thresh > .05",
1482277177Srrs	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1483277177Srrs	  lcp_stall },
1484277177Srrs/*6*/	{ "cache1", "thresh >= .2",
1485277177Srrs	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1486277177Srrs	  cache1ib },
1487277177Srrs/*7*/	{ "cache2", "thresh >= .2",
1488277177Srrs	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1489277177Srrs	  cache2has },
1490277177Srrs/*8*/	{ "contested", "thresh >= .05",
1491277177Srrs	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1492277177Srrs	  contested_has },
1493277177Srrs/*9*/	{ "datashare", "thresh >= .05",
1494277177Srrs	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1495277177Srrs	  datasharing_has },
1496277177Srrs/*10*/	{ "blockstorefwd", "thresh >= .05",
1497277177Srrs	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1498277177Srrs	  blockstoreforward },
1499277177Srrs/*11*/	{ "splitload", "thresh >= .1",
1500277177Srrs	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1501277177Srrs	  splitload },
1502277177Srrs/*12*/	{ "splitstore", "thresh >= .01",
1503277177Srrs	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1504277177Srrs	  splitstore },
1505277177Srrs/*13*/	{ "aliasing_4k", "thresh >= .1",
1506277177Srrs	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1507277177Srrs	  aliasing },
1508277177Srrs/*14*/	{ "dtlbmissload", "thresh >= .1",
1509277177Srrs	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1510277177Srrs	  dtlb_missload },
1511277177Srrs/*15*/	{ "br_miss", "thresh >= .2",
1512277177Srrs	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1513277177Srrs	  br_mispredict },
1514277177Srrs/*16*/	{ "clears", "thresh >= .02",
1515277177Srrs	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1516277177Srrs	  clears },
1517277177Srrs/*17*/	{ "microassist", "thresh >= .05",
1518277177Srrs	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1519277177Srrs	  microassist },
1520277177Srrs/*18*/	{ "fpassist", "look for a excessive value",
1521277177Srrs	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1522277177Srrs	  fpassists },
1523277177Srrs/*19*/	{ "otherassistavx", "look for a excessive value",
1524277177Srrs	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1525277177Srrs	  otherassistavx },
1526277177Srrs/*20*/	{ "otherassistsse", "look for a excessive value",
1527277177Srrs	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1528277177Srrs	  otherassistsse },
1529277177Srrs};
1530277177Srrs
1531277177Srrs
1532277177Srrsstatic void
1533277177Srrsset_sandybridge(void)
1534277177Srrs{
1535277177Srrs	strcpy(the_cpu.cputype, "SandyBridge PMC");
1536277177Srrs	the_cpu.number = SANDY_BRIDGE_COUNT;
1537277177Srrs	the_cpu.ents = sandy_bridge;
1538277177Srrs	the_cpu.explain = explain_name_sb;
1539277177Srrs}
1540277177Srrs
1541277177Srrsstatic void
1542277177Srrsset_ivybridge(void)
1543277177Srrs{
1544277177Srrs	strcpy(the_cpu.cputype, "IvyBridge PMC");
1545277177Srrs	the_cpu.number = IVY_BRIDGE_COUNT;
1546277177Srrs	the_cpu.ents = ivy_bridge;
1547277177Srrs	the_cpu.explain = explain_name_ib;
1548277177Srrs}
1549277177Srrs
1550277177Srrs
1551277177Srrsstatic void
1552277177Srrsset_haswell(void)
1553277177Srrs{
1554277177Srrs	strcpy(the_cpu.cputype, "HASWELL PMC");
1555277177Srrs	the_cpu.number = HASWELL_COUNT;
1556277177Srrs	the_cpu.ents = haswell;
1557277177Srrs	the_cpu.explain = explain_name_has;
1558277177Srrs}
1559277177Srrs
1560277177Srrsstatic void
1561277177Srrsset_expression(char *name)
1562277177Srrs{
1563277177Srrs	int found = 0, i;
1564277177Srrs	for(i=0 ; i< the_cpu.number; i++) {
1565277177Srrs		if (strcmp(name, the_cpu.ents[i].name) == 0) {
1566277177Srrs			found = 1;
1567277177Srrs			expression = the_cpu.ents[i].func;
1568277177Srrs			command = the_cpu.ents[i].command;
1569277177Srrs			threshold = the_cpu.ents[i].thresh;
1570277177Srrs			break;
1571277177Srrs		}
1572277177Srrs	}
1573277177Srrs	if (!found) {
1574277177Srrs		printf("For CPU type %s we have no expression:%s\n",
1575277177Srrs		       the_cpu.cputype, name);
1576277177Srrs		exit(-1);
1577277177Srrs	}
1578277177Srrs}
1579277177Srrs
1580277177Srrs
1581277177Srrs
1582277177Srrs
1583277177Srrs
1584277177Srrsstatic int
1585277177Srrsvalidate_expression(char *name)
1586277177Srrs{
1587277177Srrs	int i, found;
1588277177Srrs
1589277177Srrs	found = 0;
1590277177Srrs	for(i=0 ; i< the_cpu.number; i++) {
1591277177Srrs		if (strcmp(name, the_cpu.ents[i].name) == 0) {
1592277177Srrs			found = 1;
1593277177Srrs			break;
1594277177Srrs		}
1595277177Srrs	}
1596277177Srrs	if (!found) {
1597277177Srrs		return(-1);
1598277177Srrs	}
1599277177Srrs	return (0);
1600277177Srrs}
1601277177Srrs
1602277177Srrsstatic void
1603277177Srrsdo_expression(struct counters *cpu, int pos)
1604277177Srrs{
1605277177Srrs	if (expression == NULL)
1606277177Srrs		return;
1607277177Srrs	(*expression)(cpu, pos);
1608277177Srrs}
1609277177Srrs
1610277177Srrsstatic void
1611277177Srrsprocess_header(int idx, char *p)
1612277177Srrs{
1613277177Srrs	struct counters *up;
1614277177Srrs	int i, len, nlen;
1615277177Srrs	/*
1616277177Srrs	 * Given header element idx, at p in
1617277177Srrs	 * form 's/NN/nameof'
1618277177Srrs	 * process the entry to pull out the name and
1619277177Srrs	 * the CPU number.
1620277177Srrs	 */
1621277177Srrs	if (strncmp(p, "s/", 2)) {
1622277177Srrs		printf("Check -- invalid header no s/ in %s\n",
1623277177Srrs		       p);
1624277177Srrs		return;
1625277177Srrs	}
1626277177Srrs	up = &cnts[idx];
1627277177Srrs	up->cpu = strtol(&p[2], NULL, 10);
1628277177Srrs	len = strlen(p);
1629277177Srrs	for (i=2; i<len; i++) {
1630277177Srrs		if (p[i] == '/') {
1631277177Srrs			nlen = strlen(&p[(i+1)]);
1632277177Srrs			if (nlen < (MAX_NLEN-1)) {
1633277177Srrs				strcpy(up->counter_name, &p[(i+1)]);
1634277177Srrs			} else {
1635277177Srrs				strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
1636277177Srrs			}
1637277177Srrs		}
1638277177Srrs	}
1639277177Srrs}
1640277177Srrs
1641277177Srrsstatic void
1642277177Srrsbuild_counters_from_header(FILE *io)
1643277177Srrs{
1644277177Srrs	char buffer[8192], *p;
1645277177Srrs	int i, len, cnt;
1646277177Srrs	size_t mlen;
1647277177Srrs
1648277177Srrs	/* We have a new start, lets
1649277177Srrs	 * setup our headers and cpus.
1650277177Srrs	 */
1651277177Srrs	if (fgets(buffer, sizeof(buffer), io) == NULL) {
1652277177Srrs		printf("First line can't be read from file err:%d\n", errno);
1653277177Srrs		return;
1654277177Srrs	}
1655277177Srrs	/*
1656277177Srrs	 * Ok output is an array of counters. Once
1657277177Srrs	 * we start to read the values in we must
1658277177Srrs	 * put them in there slot to match there CPU and
1659277177Srrs	 * counter being updated. We create a mass array
1660277177Srrs	 * of the counters, filling in the CPU and
1661277177Srrs	 * counter name.
1662277177Srrs	 */
1663277177Srrs	/* How many do we get? */
1664277177Srrs	len = strlen(buffer);
1665277177Srrs	for (i=0, cnt=0; i<len; i++) {
1666277177Srrs		if (strncmp(&buffer[i], "s/", 2) == 0) {
1667277177Srrs			cnt++;
1668277177Srrs			for(;i<len;i++) {
1669277177Srrs				if (buffer[i] == ' ')
1670277177Srrs					break;
1671277177Srrs			}
1672277177Srrs		}
1673277177Srrs	}
1674277177Srrs	mlen = sizeof(struct counters) * cnt;
1675277177Srrs	cnts = malloc(mlen);
1676277177Srrs	ncnts = cnt;
1677277177Srrs	if (cnts == NULL) {
1678277177Srrs		printf("No memory err:%d\n", errno);
1679277177Srrs		return;
1680277177Srrs	}
1681277177Srrs	memset(cnts, 0, mlen);
1682277177Srrs	for (i=0, cnt=0; i<len; i++) {
1683277177Srrs		if (strncmp(&buffer[i], "s/", 2) == 0) {
1684277177Srrs			p = &buffer[i];
1685277177Srrs			for(;i<len;i++) {
1686277177Srrs				if (buffer[i] == ' ') {
1687277177Srrs					buffer[i] = 0;
1688277177Srrs					break;
1689277177Srrs				}
1690277177Srrs			}
1691277177Srrs			process_header(cnt, p);
1692277177Srrs			cnt++;
1693277177Srrs		}
1694277177Srrs	}
1695277177Srrs	if (verbose)
1696277177Srrs		printf("We have %d entries\n", cnt);
1697277177Srrs}
1698277177Srrsextern int max_to_collect;
1699277177Srrsint max_to_collect = MAX_COUNTER_SLOTS;
1700277177Srrs
1701277177Srrsstatic int
1702277177Srrsread_a_line(FILE *io)
1703277177Srrs{
1704277177Srrs	char buffer[8192], *p, *stop;
1705277177Srrs	int pos, i;
1706277177Srrs
1707277177Srrs	if (fgets(buffer, sizeof(buffer), io) == NULL) {
1708277177Srrs		return(0);
1709277177Srrs	}
1710277177Srrs	p = buffer;
1711277177Srrs	for (i=0; i<ncnts; i++) {
1712277177Srrs		pos = cnts[i].pos;
1713277177Srrs		cnts[i].vals[pos] = strtol(p, &stop, 0);
1714277177Srrs		cnts[i].pos++;
1715277177Srrs		cnts[i].sum += cnts[i].vals[pos];
1716277177Srrs		p = stop;
1717277177Srrs	}
1718277177Srrs	return (1);
1719277177Srrs}
1720277177Srrs
1721277177Srrsextern int cpu_count_out;
1722277177Srrsint cpu_count_out=0;
1723277177Srrs
1724277177Srrsstatic void
1725277177Srrsprint_header(void)
1726277177Srrs{
1727277177Srrs	int i, cnt, printed_cnt;
1728277177Srrs
1729277177Srrs	printf("*********************************\n");
1730277177Srrs	for(i=0, cnt=0; i<MAX_CPU; i++) {
1731277177Srrs		if (glob_cpu[i]) {
1732277177Srrs			cnt++;
1733277177Srrs		}
1734277177Srrs	}
1735277177Srrs	cpu_count_out = cnt;
1736277177Srrs	for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
1737277177Srrs		if (glob_cpu[i]) {
1738277177Srrs			printf("CPU%d", i);
1739277177Srrs			printed_cnt++;
1740277177Srrs		}
1741277177Srrs		if (printed_cnt == cnt) {
1742277177Srrs			printf("\n");
1743277177Srrs			break;
1744277177Srrs		} else {
1745277177Srrs			printf("\t");
1746277177Srrs		}
1747277177Srrs	}
1748277177Srrs}
1749277177Srrs
1750277177Srrsstatic void
1751277177Srrslace_cpus_together(void)
1752277177Srrs{
1753277177Srrs	int i, j, lace_cpu;
1754277177Srrs	struct counters *cpat, *at;
1755277177Srrs
1756277177Srrs	for(i=0; i<ncnts; i++) {
1757277177Srrs		cpat = &cnts[i];
1758277177Srrs		if (cpat->next_cpu) {
1759277177Srrs			/* Already laced in */
1760277177Srrs			continue;
1761277177Srrs		}
1762277177Srrs		lace_cpu = cpat->cpu;
1763277177Srrs		if (lace_cpu >= MAX_CPU) {
1764277177Srrs			printf("CPU %d to big\n", lace_cpu);
1765277177Srrs			continue;
1766277177Srrs		}
1767277177Srrs		if (glob_cpu[lace_cpu] == NULL) {
1768277177Srrs			glob_cpu[lace_cpu] = cpat;
1769277177Srrs		} else {
1770277177Srrs			/* Already processed this cpu */
1771277177Srrs			continue;
1772277177Srrs		}
1773277177Srrs		/* Ok look forward for cpu->cpu and link in */
1774277177Srrs		for(j=(i+1); j<ncnts; j++) {
1775277177Srrs			at = &cnts[j];
1776277177Srrs			if (at->next_cpu) {
1777277177Srrs				continue;
1778277177Srrs			}
1779277177Srrs			if (at->cpu == lace_cpu) {
1780277177Srrs				/* Found one */
1781277177Srrs				cpat->next_cpu = at;
1782277177Srrs				cpat = at;
1783277177Srrs			}
1784277177Srrs		}
1785277177Srrs	}
1786277177Srrs}
1787277177Srrs
1788277177Srrs
1789277177Srrsstatic void
1790277177Srrsprocess_file(char *filename)
1791277177Srrs{
1792277177Srrs	FILE *io;
1793277177Srrs	int i;
1794277177Srrs	int line_at, not_done;
1795277177Srrs	pid_t pid_of_command=0;
1796277177Srrs
1797277177Srrs	if (filename ==  NULL) {
1798277177Srrs		io = my_popen(command, "r", &pid_of_command);
1799277177Srrs	} else {
1800277177Srrs		io = fopen(filename, "r");
1801277177Srrs		if (io == NULL) {
1802277177Srrs			printf("Can't process file %s err:%d\n",
1803277177Srrs			       filename, errno);
1804277177Srrs			return;
1805277177Srrs		}
1806277177Srrs	}
1807277177Srrs	build_counters_from_header(io);
1808277177Srrs	if (cnts == NULL) {
1809277177Srrs		/* Nothing we can do */
1810277177Srrs		printf("Nothing to do -- no counters built\n");
1811285853Semaste		if (io) {
1812285853Semaste			fclose(io);
1813285853Semaste		}
1814277177Srrs		return;
1815277177Srrs	}
1816277177Srrs	lace_cpus_together();
1817277177Srrs	print_header();
1818277177Srrs	if (verbose) {
1819277177Srrs		for (i=0; i<ncnts; i++) {
1820277177Srrs			printf("Counter:%s cpu:%d index:%d\n",
1821277177Srrs			       cnts[i].counter_name,
1822277177Srrs			       cnts[i].cpu, i);
1823277177Srrs		}
1824277177Srrs	}
1825277177Srrs	line_at = 0;
1826277177Srrs	not_done = 1;
1827277177Srrs	while(not_done) {
1828277177Srrs		if (read_a_line(io)) {
1829277177Srrs			line_at++;
1830277177Srrs		} else {
1831277177Srrs			break;
1832277177Srrs		}
1833277177Srrs		if (line_at >= max_to_collect) {
1834277177Srrs			not_done = 0;
1835277177Srrs		}
1836277177Srrs		if (filename == NULL) {
1837277177Srrs			int cnt;
1838277177Srrs			/* For the ones we dynamically open we print now */
1839277177Srrs			for(i=0, cnt=0; i<MAX_CPU; i++) {
1840277177Srrs				do_expression(glob_cpu[i], (line_at-1));
1841277177Srrs				cnt++;
1842277177Srrs				if (cnt == cpu_count_out) {
1843277177Srrs					printf("\n");
1844277177Srrs					break;
1845277177Srrs				} else {
1846277177Srrs					printf("\t");
1847277177Srrs				}
1848277177Srrs			}
1849277177Srrs		}
1850277177Srrs	}
1851277177Srrs	if (filename) {
1852277177Srrs		fclose(io);
1853277177Srrs	} else {
1854277177Srrs		my_pclose(io, pid_of_command);
1855277177Srrs	}
1856277177Srrs}
1857277177Srrs#if defined(__amd64__)
1858277177Srrs#define cpuid(in,a,b,c,d)\
1859277177Srrs  asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
1860277177Srrs#else
1861277177Srrs#define cpuid(in, a, b, c, d)
1862277177Srrs#endif
1863277177Srrs
1864277177Srrsstatic void
1865277177Srrsget_cpuid_set(void)
1866277177Srrs{
1867277177Srrs	unsigned long eax, ebx, ecx, edx;
1868277177Srrs	int model;
1869277177Srrs	pid_t pid_of_command=0;
1870277177Srrs	size_t sz, len;
1871277177Srrs	FILE *io;
1872277177Srrs	char linebuf[1024], *str;
1873277177Srrs
1874277177Srrs	eax = ebx = ecx = edx = 0;
1875277177Srrs
1876277177Srrs	cpuid(0, eax, ebx, ecx, edx);
1877277177Srrs	if (ebx == 0x68747541) {
1878277177Srrs		printf("AMD processors are not supported by this program\n");
1879277177Srrs		printf("Sorry\n");
1880277177Srrs		exit(0);
1881277177Srrs	} else if (ebx == 0x6972794) {
1882277177Srrs		printf("Cyrix processors are not supported by this program\n");
1883277177Srrs		printf("Sorry\n");
1884277177Srrs		exit(0);
1885277177Srrs	} else if (ebx == 0x756e6547) {
1886277177Srrs		printf("Genuine Intel\n");
1887277177Srrs	} else {
1888277177Srrs		printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
1889277177Srrs		exit(0);
1890277177Srrs	}
1891277177Srrs	cpuid(1, eax, ebx, ecx, edx);
1892277177Srrs	model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
1893277177Srrs	printf("CPU model is 0x%x id:0x%lx\n", model, eax);
1894277177Srrs	switch (eax & 0xF00) {
1895277177Srrs	case 0x500:		/* Pentium family processors */
1896277177Srrs		printf("Intel Pentium P5\n");
1897277177Srrs		goto not_supported;
1898277177Srrs		break;
1899277177Srrs	case 0x600:		/* Pentium Pro, Celeron, Pentium II & III */
1900277177Srrs		switch (model) {
1901277177Srrs		case 0x1:
1902277177Srrs			printf("Intel Pentium P6\n");
1903277177Srrs			goto not_supported;
1904277177Srrs			break;
1905277177Srrs		case 0x3:
1906277177Srrs		case 0x5:
1907277177Srrs			printf("Intel PII\n");
1908277177Srrs			goto not_supported;
1909277177Srrs			break;
1910277177Srrs		case 0x6: case 0x16:
1911277177Srrs			printf("Intel CL\n");
1912277177Srrs			goto not_supported;
1913277177Srrs			break;
1914277177Srrs		case 0x7: case 0x8: case 0xA: case 0xB:
1915277177Srrs			printf("Intel PIII\n");
1916277177Srrs			goto not_supported;
1917277177Srrs			break;
1918277177Srrs		case 0x9: case 0xD:
1919277177Srrs			printf("Intel PM\n");
1920277177Srrs			goto not_supported;
1921277177Srrs			break;
1922277177Srrs		case 0xE:
1923277177Srrs			printf("Intel CORE\n");
1924277177Srrs			goto not_supported;
1925277177Srrs			break;
1926277177Srrs		case 0xF:
1927277177Srrs			printf("Intel CORE2\n");
1928277177Srrs			goto not_supported;
1929277177Srrs			break;
1930277177Srrs		case 0x17:
1931277177Srrs			printf("Intel CORE2EXTREME\n");
1932277177Srrs			goto not_supported;
1933277177Srrs			break;
1934277177Srrs		case 0x1C:	/* Per Intel document 320047-002. */
1935277177Srrs			printf("Intel ATOM\n");
1936277177Srrs			goto not_supported;
1937277177Srrs			break;
1938277177Srrs		case 0x1A:
1939277177Srrs		case 0x1E:	/*
1940277177Srrs				 * Per Intel document 253669-032 9/2009,
1941277177Srrs				 * pages A-2 and A-57
1942277177Srrs				 */
1943277177Srrs		case 0x1F:	/*
1944277177Srrs				 * Per Intel document 253669-032 9/2009,
1945277177Srrs				 * pages A-2 and A-57
1946277177Srrs				 */
1947277177Srrs			printf("Intel COREI7\n");
1948277177Srrs			goto not_supported;
1949277177Srrs			break;
1950277177Srrs		case 0x2E:
1951277177Srrs			printf("Intel NEHALEM\n");
1952277177Srrs			goto not_supported;
1953277177Srrs			break;
1954277177Srrs		case 0x25:	/* Per Intel document 253669-033US 12/2009. */
1955277177Srrs		case 0x2C:	/* Per Intel document 253669-033US 12/2009. */
1956277177Srrs			printf("Intel WESTMERE\n");
1957277177Srrs			goto not_supported;
1958277177Srrs			break;
1959277177Srrs		case 0x2F:	/* Westmere-EX, seen in wild */
1960277177Srrs			printf("Intel WESTMERE\n");
1961277177Srrs			goto not_supported;
1962277177Srrs			break;
1963277177Srrs		case 0x2A:	/* Per Intel document 253669-039US 05/2011. */
1964277177Srrs			printf("Intel SANDYBRIDGE\n");
1965277177Srrs			set_sandybridge();
1966277177Srrs			break;
1967277177Srrs		case 0x2D:	/* Per Intel document 253669-044US 08/2012. */
1968277177Srrs			printf("Intel SANDYBRIDGE_XEON\n");
1969277177Srrs			set_sandybridge();
1970277177Srrs			break;
1971277177Srrs		case 0x3A:	/* Per Intel document 253669-043US 05/2012. */
1972277177Srrs			printf("Intel IVYBRIDGE\n");
1973277177Srrs			set_ivybridge();
1974277177Srrs			break;
1975277177Srrs		case 0x3E:	/* Per Intel document 325462-045US 01/2013. */
1976277177Srrs			printf("Intel IVYBRIDGE_XEON\n");
1977277177Srrs			set_ivybridge();
1978277177Srrs			break;
1979277177Srrs		case 0x3F:	/* Per Intel document 325462-045US 09/2014. */
1980277177Srrs			printf("Intel HASWELL (Xeon)\n");
1981277177Srrs			set_haswell();
1982277177Srrs			break;
1983277177Srrs		case 0x3C:	/* Per Intel document 325462-045US 01/2013. */
1984277177Srrs		case 0x45:
1985277177Srrs		case 0x46:
1986277177Srrs			printf("Intel HASWELL\n");
1987277177Srrs			set_haswell();
1988277177Srrs			break;
1989277177Srrs		case 0x4D:
1990277177Srrs			/* Per Intel document 330061-001 01/2014. */
1991277177Srrs			printf("Intel ATOM_SILVERMONT\n");
1992277177Srrs			goto not_supported;
1993277177Srrs			break;
1994277177Srrs		default:
1995277177Srrs			printf("Intel model 0x%x is not known -- sorry\n",
1996277177Srrs			       model);
1997277177Srrs			goto not_supported;
1998277177Srrs			break;
1999277177Srrs		}
2000277177Srrs		break;
2001277177Srrs	case 0xF00:		/* P4 */
2002277177Srrs		printf("Intel unknown model %d\n", model);
2003277177Srrs		goto not_supported;
2004277177Srrs		break;
2005277177Srrs	}
2006277177Srrs	/* Ok lets load the list of all known PMC's */
2007277177Srrs	io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2008277177Srrs	if (valid_pmcs == NULL) {
2009277177Srrs		/* Likely */
2010277177Srrs		pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2011277177Srrs		sz = sizeof(char *) * pmc_allocated_cnt;
2012277177Srrs		valid_pmcs = malloc(sz);
2013277177Srrs		if (valid_pmcs == NULL) {
2014277177Srrs			printf("No memory allocation fails at startup?\n");
2015277177Srrs			exit(-1);
2016277177Srrs		}
2017277177Srrs		memset(valid_pmcs, 0, sz);
2018277177Srrs	}
2019277177Srrs
2020277177Srrs	while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2021277177Srrs		if (linebuf[0] != '\t') {
2022277177Srrs			/* sometimes headers ;-) */
2023277177Srrs			continue;
2024277177Srrs		}
2025277177Srrs		len = strlen(linebuf);
2026277177Srrs		if (linebuf[(len-1)] == '\n') {
2027277177Srrs			/* Likely */
2028277177Srrs			linebuf[(len-1)] = 0;
2029277177Srrs		}
2030277177Srrs		str = &linebuf[1];
2031277177Srrs		len = strlen(str) + 1;
2032277177Srrs		valid_pmcs[valid_pmc_cnt] = malloc(len);
2033277177Srrs		if (valid_pmcs[valid_pmc_cnt] == NULL) {
2034277177Srrs			printf("No memory2 allocation fails at startup?\n");
2035277177Srrs			exit(-1);
2036277177Srrs		}
2037277177Srrs		memset(valid_pmcs[valid_pmc_cnt], 0, len);
2038277177Srrs		strcpy(valid_pmcs[valid_pmc_cnt], str);
2039277177Srrs		valid_pmc_cnt++;
2040277177Srrs		if (valid_pmc_cnt >= pmc_allocated_cnt) {
2041277177Srrs			/* Got to expand -- unlikely */
2042277177Srrs			char **more;
2043277177Srrs
2044277177Srrs			sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2045277177Srrs			more = malloc(sz);
2046277177Srrs			if (more == NULL) {
2047277177Srrs				printf("No memory3 allocation fails at startup?\n");
2048277177Srrs				exit(-1);
2049277177Srrs			}
2050285853Semaste			memset(more, 0, sz);
2051277177Srrs			memcpy(more, valid_pmcs, sz);
2052277177Srrs			pmc_allocated_cnt *= 2;
2053277177Srrs			free(valid_pmcs);
2054277177Srrs			valid_pmcs = more;
2055277177Srrs		}
2056277177Srrs	}
2057277177Srrs	my_pclose(io, pid_of_command);
2058277177Srrs	return;
2059277177Srrsnot_supported:
2060277177Srrs	printf("Not supported\n");
2061277177Srrs	exit(-1);
2062277177Srrs}
2063277177Srrs
2064277177Srrsstatic void
2065277177Srrsexplain_all(void)
2066277177Srrs{
2067277177Srrs	int i;
2068277177Srrs	printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2069277177Srrs	printf("-------------------------------------------------------------\n");
2070277177Srrs	for(i=0; i<the_cpu.number; i++){
2071277177Srrs		printf("For -e %s ", the_cpu.ents[i].name);
2072277177Srrs		(*the_cpu.explain)(the_cpu.ents[i].name);
2073277177Srrs		printf("----------------------------\n");
2074277177Srrs	}
2075277177Srrs}
2076277177Srrs
2077277177Srrsstatic void
2078277177Srrstest_for_a_pmc(const char *pmc, int out_so_far)
2079277177Srrs{
2080277177Srrs	FILE *io;
2081277177Srrs	pid_t pid_of_command=0;
2082277177Srrs	char my_command[1024];
2083277177Srrs	char line[1024];
2084277177Srrs	char resp[1024];
2085277177Srrs	int len, llen, i;
2086277177Srrs
2087277177Srrs	if (out_so_far < 50) {
2088277177Srrs		len = 50 - out_so_far;
2089277177Srrs		for(i=0; i<len; i++) {
2090277177Srrs			printf(" ");
2091277177Srrs		}
2092277177Srrs	}
2093277177Srrs	sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2094277177Srrs	io = my_popen(my_command, "r", &pid_of_command);
2095277177Srrs	if (io == NULL) {
2096277177Srrs		printf("Failed -- popen fails\n");
2097277177Srrs		return;
2098277177Srrs	}
2099277177Srrs	/* Setup what we expect */
2100277177Srrs	len = sprintf(resp, "%s", pmc);
2101277177Srrs	if (fgets(line, sizeof(line), io) == NULL) {
2102277177Srrs		printf("Failed -- no output from pmstat\n");
2103277177Srrs		goto out;
2104277177Srrs	}
2105277177Srrs	llen = strlen(line);
2106277177Srrs	if (line[(llen-1)] == '\n') {
2107277177Srrs		line[(llen-1)] = 0;
2108277177Srrs		llen--;
2109277177Srrs	}
2110277177Srrs	for(i=2; i<(llen-len); i++) {
2111277177Srrs		if (strncmp(&line[i], "ERROR", 5) == 0) {
2112277177Srrs			printf("Failed %s\n", line);
2113277177Srrs			goto out;
2114277177Srrs		} else if (strncmp(&line[i], resp, len) == 0) {
2115277177Srrs			int j, k;
2116277177Srrs
2117277177Srrs			if (fgets(line, sizeof(line), io) == NULL) {
2118277177Srrs				printf("Failed -- no second output from pmstat\n");
2119277177Srrs				goto out;
2120277177Srrs			}
2121277177Srrs			len = strlen(line);
2122277177Srrs			for (j=0; j<len; j++) {
2123277177Srrs				if (line[j] == ' ') {
2124277177Srrs					j++;
2125277177Srrs				} else {
2126277177Srrs					break;
2127277177Srrs				}
2128277177Srrs			}
2129277177Srrs			printf("Pass");
2130277177Srrs			len = strlen(&line[j]);
2131277177Srrs			if (len < 20) {
2132277177Srrs				for(k=0; k<(20-len); k++) {
2133277177Srrs					printf(" ");
2134277177Srrs				}
2135277177Srrs			}
2136281235Srrs			if (len) {
2137281235Srrs				printf("%s", &line[j]);
2138281235Srrs			} else {
2139281235Srrs				printf("\n");
2140281235Srrs			}
2141277177Srrs			goto out;
2142277177Srrs		}
2143277177Srrs	}
2144277177Srrs	printf("Failed -- '%s' not '%s'\n", line, resp);
2145277177Srrsout:
2146277177Srrs	my_pclose(io, pid_of_command);
2147277177Srrs
2148277177Srrs}
2149277177Srrs
2150277177Srrsstatic int
2151277177Srrsadd_it_to(char **vars, int cur_cnt, char *name)
2152277177Srrs{
2153277177Srrs	int i;
2154277177Srrs	size_t len;
2155277177Srrs	for(i=0; i<cur_cnt; i++) {
2156277177Srrs		if (strcmp(vars[i], name) == 0) {
2157277177Srrs			/* Already have */
2158277177Srrs			return(0);
2159277177Srrs		}
2160277177Srrs	}
2161277177Srrs	if (vars[cur_cnt] != NULL) {
2162277177Srrs		printf("Cur_cnt:%d filled with %s??\n",
2163277177Srrs		       cur_cnt, vars[cur_cnt]);
2164277177Srrs		exit(-1);
2165277177Srrs	}
2166277177Srrs	/* Ok its new */
2167277177Srrs	len = strlen(name) + 1;
2168277177Srrs	vars[cur_cnt] = malloc(len);
2169277177Srrs	if (vars[cur_cnt] == NULL) {
2170277177Srrs		printf("No memory %s\n", __FUNCTION__);
2171277177Srrs		exit(-1);
2172277177Srrs	}
2173277177Srrs	memset(vars[cur_cnt], 0, len);
2174277177Srrs	strcpy(vars[cur_cnt], name);
2175277177Srrs	return(1);
2176277177Srrs}
2177277177Srrs
2178277177Srrsstatic char *
2179277177Srrsbuild_command_for_exp(struct expression *exp)
2180277177Srrs{
2181277177Srrs	/*
2182277177Srrs	 * Build the pmcstat command to handle
2183277177Srrs	 * the passed in expression.
2184277177Srrs	 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2185277177Srrs	 * where NNN and QQQ represent the PMC's in the expression
2186277177Srrs	 * uniquely..
2187277177Srrs	 */
2188277177Srrs	char forming[1024];
2189277177Srrs	int cnt_pmc, alloced_pmcs, i;
2190277177Srrs	struct expression *at;
2191277177Srrs	char **vars, *cmd;
2192277177Srrs	size_t mal;
2193277177Srrs
2194277177Srrs	alloced_pmcs = cnt_pmc = 0;
2195277177Srrs	/* first how many do we have */
2196277177Srrs	at = exp;
2197277177Srrs	while (at) {
2198277177Srrs		if (at->type == TYPE_VALUE_PMC) {
2199277177Srrs			cnt_pmc++;
2200277177Srrs		}
2201277177Srrs		at = at->next;
2202277177Srrs	}
2203277177Srrs	if (cnt_pmc == 0) {
2204277177Srrs		printf("No PMC's in your expression -- nothing to do!!\n");
2205277177Srrs		exit(0);
2206277177Srrs	}
2207277177Srrs	mal = cnt_pmc * sizeof(char *);
2208277177Srrs	vars = malloc(mal);
2209277177Srrs	if (vars == NULL) {
2210277177Srrs		printf("No memory\n");
2211277177Srrs		exit(-1);
2212277177Srrs	}
2213277177Srrs	memset(vars, 0, mal);
2214277177Srrs	at = exp;
2215277177Srrs	while (at) {
2216277177Srrs		if (at->type == TYPE_VALUE_PMC) {
2217277177Srrs			if(add_it_to(vars, alloced_pmcs, at->name)) {
2218277177Srrs				alloced_pmcs++;
2219277177Srrs			}
2220277177Srrs		}
2221277177Srrs		at = at->next;
2222277177Srrs	}
2223277177Srrs	/* Now we have a unique list in vars so create our command */
2224277177Srrs	mal = 23; /*	"/usr/sbin/pmcstat -w 1"  + \0 */
2225277177Srrs	for(i=0; i<alloced_pmcs; i++) {
2226277177Srrs		mal += strlen(vars[i]) + 4;	/* var + " -s " */
2227277177Srrs	}
2228277177Srrs	cmd = malloc((mal+2));
2229277177Srrs	if (cmd == NULL) {
2230277177Srrs		printf("%s out of mem\n", __FUNCTION__);
2231277177Srrs		exit(-1);
2232277177Srrs	}
2233277177Srrs	memset(cmd, 0, (mal+2));
2234277177Srrs	strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2235277177Srrs	at = exp;
2236277177Srrs	for(i=0; i<alloced_pmcs; i++) {
2237277177Srrs		sprintf(forming, " -s %s", vars[i]);
2238277177Srrs		strcat(cmd, forming);
2239277177Srrs		free(vars[i]);
2240277177Srrs		vars[i] = NULL;
2241277177Srrs	}
2242277177Srrs	free(vars);
2243277177Srrs	return(cmd);
2244277177Srrs}
2245277177Srrs
2246277177Srrsstatic int
2247277177Srrsuser_expr(struct counters *cpu, int pos)
2248277177Srrs{
2249277177Srrs	int ret;
2250277177Srrs	double res;
2251277177Srrs	struct counters *var;
2252277177Srrs	struct expression *at;
2253277177Srrs
2254277177Srrs	at = master_exp;
2255277177Srrs	while (at) {
2256277177Srrs		if (at->type == TYPE_VALUE_PMC) {
2257277177Srrs			var = find_counter(cpu, at->name);
2258277177Srrs			if (var == NULL) {
2259277177Srrs				printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2260277177Srrs				exit(-1);
2261277177Srrs			}
2262277177Srrs			if (pos != -1) {
2263277177Srrs				at->value = var->vals[pos] * 1.0;
2264277177Srrs			} else {
2265277177Srrs				at->value = var->sum * 1.0;
2266277177Srrs			}
2267277177Srrs		}
2268277177Srrs		at = at->next;
2269277177Srrs	}
2270277177Srrs	res = run_expr(master_exp, 1, NULL);
2271277177Srrs	ret = printf("%1.3f", res);
2272277177Srrs	return(ret);
2273277177Srrs}
2274277177Srrs
2275277177Srrs
2276277177Srrsstatic void
2277277177Srrsset_manual_exp(struct expression *exp)
2278277177Srrs{
2279277177Srrs	expression = user_expr;
2280277177Srrs	command = build_command_for_exp(exp);
2281277177Srrs	threshold = "User defined threshold";
2282277177Srrs}
2283277177Srrs
2284277177Srrsstatic void
2285277177Srrsrun_tests(void)
2286277177Srrs{
2287277177Srrs	int i, lenout;
2288277177Srrs	printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2289277177Srrs	printf("------------------------------------------------------------------------\n");
2290277177Srrs	for(i=0; i<valid_pmc_cnt; i++) {
2291277177Srrs		lenout = printf("%s", valid_pmcs[i]);
2292277177Srrs		fflush(stdout);
2293277177Srrs		test_for_a_pmc(valid_pmcs[i], lenout);
2294277177Srrs	}
2295277177Srrs}
2296277177Srrsstatic void
2297277177Srrslist_all(void)
2298277177Srrs{
2299277177Srrs	int i, cnt, j;
2300277177Srrs	printf("PMC                                               Abbreviation\n");
2301277177Srrs	printf("--------------------------------------------------------------\n");
2302277177Srrs	for(i=0; i<valid_pmc_cnt; i++) {
2303277177Srrs		cnt = printf("%s", valid_pmcs[i]);
2304277177Srrs		for(j=cnt; j<52; j++) {
2305277177Srrs			printf(" ");
2306277177Srrs		}
2307277177Srrs		printf("%%%d\n", i);
2308277177Srrs	}
2309277177Srrs}
2310277177Srrs
2311277177Srrs
2312277177Srrsint
2313277177Srrsmain(int argc, char **argv)
2314277177Srrs{
2315277177Srrs	int i, j, cnt;
2316277177Srrs	char *filename=NULL;
2317277177Srrs	char *name=NULL;
2318277177Srrs	int help_only = 0;
2319277177Srrs	int test_mode = 0;
2320277177Srrs
2321277177Srrs	get_cpuid_set();
2322277177Srrs	memset(glob_cpu, 0, sizeof(glob_cpu));
2323277177Srrs	while ((i = getopt(argc, argv, "LHhvm:i:?e:TE:")) != -1) {
2324277177Srrs		switch (i) {
2325277177Srrs		case 'L':
2326277177Srrs			list_all();
2327277177Srrs			return(0);
2328277177Srrs		case 'H':
2329277177Srrs			printf("**********************************\n");
2330277177Srrs			explain_all();
2331277177Srrs			printf("**********************************\n");
2332277177Srrs			return(0);
2333277177Srrs			break;
2334277177Srrs		case 'T':
2335277177Srrs			test_mode = 1;
2336277177Srrs			break;
2337277177Srrs		case 'E':
2338277177Srrs			master_exp = parse_expression(optarg);
2339277177Srrs			if (master_exp) {
2340277177Srrs				set_manual_exp(master_exp);
2341277177Srrs			}
2342277177Srrs			break;
2343277177Srrs		case 'e':
2344277177Srrs			if (validate_expression(optarg)) {
2345277177Srrs				printf("Unknown expression %s\n", optarg);
2346277177Srrs				return(0);
2347277177Srrs			}
2348277177Srrs			name = optarg;
2349277177Srrs			set_expression(optarg);
2350277177Srrs			break;
2351277177Srrs		case 'm':
2352277177Srrs			max_to_collect = strtol(optarg, NULL, 0);
2353277177Srrs			if (max_to_collect > MAX_COUNTER_SLOTS) {
2354277177Srrs				/* You can't collect more than max in array */
2355277177Srrs				max_to_collect = MAX_COUNTER_SLOTS;
2356277177Srrs			}
2357277177Srrs			break;
2358277177Srrs		case 'v':
2359277177Srrs			verbose++;
2360277177Srrs			break;
2361277177Srrs		case 'h':
2362277177Srrs			help_only = 1;
2363277177Srrs			break;
2364277177Srrs		case 'i':
2365277177Srrs			filename = optarg;
2366277177Srrs			break;
2367277177Srrs		case '?':
2368277177Srrs		default:
2369277177Srrs		use:
2370277177Srrs			printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2371277177Srrs			       argv[0]);
2372277177Srrs			printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2373277177Srrs			printf("-v -- verbose dump debug type things -- you don't want this\n");
2374277177Srrs			printf("-m N -- maximum to collect is N measurments\n");
2375277177Srrs			printf("-e expr-name -- Do expression expr-name\n");
2376277177Srrs			printf("-E 'your expression' -- Do your expression\n");
2377277177Srrs			printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2378277177Srrs			printf("-H -- Don't run anything, just explain all canned expressions\n");
2379277177Srrs			printf("-T -- Test all PMC's defined by this processor\n");
2380277177Srrs			return(0);
2381277177Srrs			break;
2382277177Srrs		};
2383277177Srrs	}
2384277177Srrs	if ((name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) {
2385277177Srrs		printf("Without setting an expression we cannot dynamically gather information\n");
2386277177Srrs		printf("you must supply a filename (and you probably want verbosity)\n");
2387277177Srrs		goto use;
2388277177Srrs	}
2389277177Srrs	if (test_mode) {
2390277177Srrs		run_tests();
2391277177Srrs		return(0);
2392277177Srrs	}
2393277177Srrs	printf("*********************************\n");
2394277177Srrs	if (master_exp == NULL) {
2395277177Srrs		(*the_cpu.explain)(name);
2396277177Srrs	} else {
2397277177Srrs		printf("Examine your expression ");
2398277177Srrs		print_exp(master_exp);
2399277177Srrs		printf("User defined threshold\n");
2400277177Srrs	}
2401277177Srrs	if (help_only) {
2402277177Srrs		return(0);
2403277177Srrs	}
2404277177Srrs	process_file(filename);
2405277177Srrs	if (verbose >= 2) {
2406277177Srrs		for (i=0; i<ncnts; i++) {
2407277177Srrs			printf("Counter:%s cpu:%d index:%d\n",
2408277177Srrs			       cnts[i].counter_name,
2409277177Srrs			       cnts[i].cpu, i);
2410277177Srrs			for(j=0; j<cnts[i].pos; j++) {
2411277177Srrs				printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2412277177Srrs			}
2413277177Srrs			printf(" sum - %ld\n", (long int)cnts[i].sum);
2414277177Srrs		}
2415277177Srrs	}
2416277177Srrs	if (expression == NULL) {
2417277177Srrs		return(0);
2418277177Srrs	}
2419277177Srrs	for(i=0, cnt=0; i<MAX_CPU; i++) {
2420277177Srrs		if (glob_cpu[i]) {
2421277177Srrs			do_expression(glob_cpu[i], -1);
2422277177Srrs			cnt++;
2423277177Srrs			if (cnt == cpu_count_out) {
2424277177Srrs				printf("\n");
2425277177Srrs				break;
2426277177Srrs			} else {
2427277177Srrs				printf("\t");
2428277177Srrs			}
2429277177Srrs		}
2430277177Srrs	}
2431277177Srrs	return(0);
2432277177Srrs}
2433