1/*-
2 * Copyright (c) 2014-2015 Netflix, Inc.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer,
9 *    in this position and unchanged.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote products
14 *    derived from this software without specific prior written permission
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27#include <sys/types.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <unistd.h>
31#include <string.h>
32#include <strings.h>
33#include <sys/errno.h>
34#include <signal.h>
35#include <sys/wait.h>
36#include <getopt.h>
37#include "eval_expr.h"
38__FBSDID("$FreeBSD$");
39
40static int max_pmc_counters = 1;
41static int run_all = 0;
42
43#define MAX_COUNTER_SLOTS 1024
44#define MAX_NLEN 64
45#define MAX_CPU 64
46static int verbose = 0;
47
48extern char **environ;
49extern struct expression *master_exp;
50struct expression *master_exp=NULL;
51
52#define PMC_INITIAL_ALLOC 512
53extern char **valid_pmcs;
54char **valid_pmcs = NULL;
55extern int valid_pmc_cnt;
56int valid_pmc_cnt=0;
57extern int pmc_allocated_cnt;
58int pmc_allocated_cnt=0;
59
60/*
61 * The following two varients on popen and pclose with
62 * the cavet that they get you the PID so that you
63 * can supply it to pclose so it can send a SIGTERM
64 *  to the process.
65 */
66static FILE *
67my_popen(const char *command, const char *dir, pid_t *p_pid)
68{
69	FILE *io_out, *io_in;
70	int pdesin[2], pdesout[2];
71	char *argv[4];
72	pid_t pid;
73	char cmd[4];
74	char cmd2[1024];
75	char arg1[4];
76
77	if ((strcmp(dir, "r") != 0) &&
78	    (strcmp(dir, "w") != 0)) {
79		errno = EINVAL;
80		return(NULL);
81	}
82	if (pipe(pdesin) < 0)
83		return (NULL);
84
85	if (pipe(pdesout) < 0) {
86		(void)close(pdesin[0]);
87		(void)close(pdesin[1]);
88		return (NULL);
89	}
90	strcpy(cmd, "sh");
91	strcpy(arg1, "-c");
92	strcpy(cmd2, command);
93	argv[0] = cmd;
94	argv[1] = arg1;
95	argv[2] = cmd2;
96	argv[3] = NULL;
97
98	switch (pid = fork()) {
99	case -1:			/* Error. */
100		(void)close(pdesin[0]);
101		(void)close(pdesin[1]);
102		(void)close(pdesout[0]);
103		(void)close(pdesout[1]);
104		return (NULL);
105		/* NOTREACHED */
106	case 0:				/* Child. */
107		/* Close out un-used sides */
108		(void)close(pdesin[1]);
109		(void)close(pdesout[0]);
110		/* Now prepare the stdin of the process */
111		close(0);
112		(void)dup(pdesin[0]);
113		(void)close(pdesin[0]);
114		/* Now prepare the stdout of the process */
115		close(1);
116		(void)dup(pdesout[1]);
117		/* And lets do stderr just in case */
118		close(2);
119		(void)dup(pdesout[1]);
120		(void)close(pdesout[1]);
121		/* Now run it */
122		execve("/bin/sh", argv, environ);
123		exit(127);
124		/* NOTREACHED */
125	}
126	/* Parent; assume fdopen can't fail. */
127	/* Store the pid */
128	*p_pid = pid;
129	if (strcmp(dir, "r") != 0) {
130		io_out = fdopen(pdesin[1], "w");
131		(void)close(pdesin[0]);
132		(void)close(pdesout[0]);
133		(void)close(pdesout[1]);
134		return(io_out);
135 	} else {
136		/* Prepare the input stream */
137		io_in = fdopen(pdesout[0], "r");
138		(void)close(pdesout[1]);
139		(void)close(pdesin[0]);
140		(void)close(pdesin[1]);
141		return (io_in);
142	}
143}
144
145/*
146 * pclose --
147 *	Pclose returns -1 if stream is not associated with a `popened' command,
148 *	if already `pclosed', or waitpid returns an error.
149 */
150static void
151my_pclose(FILE *io, pid_t the_pid)
152{
153	int pstat;
154	pid_t pid;
155
156	/*
157	 * Find the appropriate file pointer and remove it from the list.
158	 */
159	(void)fclose(io);
160	/* Die if you are not dead! */
161	kill(the_pid, SIGTERM);
162	do {
163		pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
164	} while (pid == -1 && errno == EINTR);
165}
166
167struct counters {
168	struct counters *next_cpu;
169	char counter_name[MAX_NLEN];		/* Name of counter */
170	int cpu;				/* CPU we are on */
171	int pos;				/* Index we are filling to. */
172	uint64_t vals[MAX_COUNTER_SLOTS];	/* Last 64 entries */
173	uint64_t sum;				/* Summary of entries */
174};
175
176extern struct counters *glob_cpu[MAX_CPU];
177struct counters *glob_cpu[MAX_CPU];
178
179extern struct counters *cnts;
180struct counters *cnts=NULL;
181
182extern int ncnts;
183int ncnts=0;
184
185extern int (*expression)(struct counters *, int);
186int (*expression)(struct counters *, int);
187
188static const char *threshold=NULL;
189static const char *command;
190
191struct cpu_entry {
192	const char *name;
193	const char *thresh;
194	const char *command;
195	int (*func)(struct counters *, int);
196	int counters_required;
197};
198
199struct cpu_type {
200	char cputype[32];
201	int number;
202	struct cpu_entry *ents;
203	void (*explain)(const char *name);
204};
205extern struct cpu_type the_cpu;
206struct cpu_type the_cpu;
207
208static void
209explain_name_sb(const char *name)
210{
211	const char *mythresh;
212	if (strcmp(name, "allocstall1") == 0) {
213		printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
214		mythresh = "thresh > .05";
215	} else if (strcmp(name, "allocstall2") == 0) {
216		printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
217		mythresh = "thresh > .05";
218	} else if (strcmp(name, "br_miss") == 0) {
219		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
220		mythresh = "thresh >= .2";
221	} else if (strcmp(name, "splitload") == 0) {
222		printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
223		mythresh = "thresh >= .1";
224	} else if (strcmp(name, "splitstore") == 0) {
225		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
226		mythresh = "thresh >= .01";
227	} else if (strcmp(name, "contested") == 0) {
228		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
229		mythresh = "thresh >= .05";
230	} else if (strcmp(name, "blockstorefwd") == 0) {
231		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
232		mythresh = "thresh >= .05";
233	} else if (strcmp(name, "cache2") == 0) {
234		printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
235		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
236		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
237		printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
238		mythresh = "thresh >= .2";
239	} else if (strcmp(name, "cache1") == 0) {
240		printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
241		mythresh = "thresh >= .2";
242	} else if (strcmp(name, "dtlbmissload") == 0) {
243		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
244		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
245		mythresh = "thresh >= .1";
246	} else if (strcmp(name, "frontendstall") == 0) {
247		printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
248		mythresh = "thresh >= .15";
249	} else if (strcmp(name, "clears") == 0) {
250		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
251		printf("          MACHINE_CLEARS.SMC + \n");
252		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
253		mythresh = "thresh >= .02";
254	} else if (strcmp(name, "microassist") == 0) {
255		printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
256		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
257		mythresh = "thresh >= .05";
258	} else if (strcmp(name, "aliasing_4k") == 0) {
259		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
260		mythresh = "thresh >= .1";
261	} else if (strcmp(name, "fpassist") == 0) {
262		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
263		mythresh = "look for a excessive value";
264	} else if (strcmp(name, "otherassistavx") == 0) {
265		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
266		mythresh = "look for a excessive value";
267	} else if (strcmp(name, "otherassistsse") == 0) {
268		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
269		mythresh = "look for a excessive value";
270	} else if (strcmp(name, "eff1") == 0) {
271		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
272		mythresh = "thresh < .9";
273	} else if (strcmp(name, "eff2") == 0) {
274		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
275		mythresh = "thresh > 1.0";
276	} else if (strcmp(name, "dtlbmissstore") == 0) {
277		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
278		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
279		mythresh = "thresh >= .05";
280	} else {
281		printf("Unknown name:%s\n", name);
282		mythresh = "unknown entry";
283        }
284	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
285}
286
287static void
288explain_name_ib(const char *name)
289{
290	const char *mythresh;
291	if (strcmp(name, "br_miss") == 0) {
292		printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
293		printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
294		printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
295		mythresh = "thresh >= .2";
296	} else if (strcmp(name, "eff1") == 0) {
297		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
298		mythresh = "thresh < .9";
299	} else if (strcmp(name, "eff2") == 0) {
300		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
301		mythresh = "thresh > 1.0";
302	} else if (strcmp(name, "cache1") == 0) {
303		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
304		mythresh = "thresh >= .2";
305	} else if (strcmp(name, "cache2") == 0) {
306		printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
307		mythresh = "thresh >= .2";
308	} else if (strcmp(name, "itlbmiss") == 0) {
309		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
310		mythresh = "thresh > .05";
311	} else if (strcmp(name, "icachemiss") == 0) {
312		printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
313		mythresh = "thresh > .05";
314	} else if (strcmp(name, "lcpstall") == 0) {
315		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
316		mythresh = "thresh > .05";
317	} else if (strcmp(name, "datashare") == 0) {
318		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
319		mythresh = "thresh > .05";
320	} else if (strcmp(name, "blockstorefwd") == 0) {
321		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
322		mythresh = "thresh >= .05";
323	} else if (strcmp(name, "splitload") == 0) {
324		printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
325		printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
326		mythresh = "thresh >= .1";
327	} else if (strcmp(name, "splitstore") == 0) {
328		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
329		mythresh = "thresh >= .01";
330	} else if (strcmp(name, "aliasing_4k") == 0) {
331		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
332		mythresh = "thresh >= .1";
333	} else if (strcmp(name, "dtlbmissload") == 0) {
334		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
335		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
336		mythresh = "thresh >= .1";
337	} else if (strcmp(name, "dtlbmissstore") == 0) {
338		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
339		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
340		mythresh = "thresh >= .05";
341	} else if (strcmp(name, "contested") == 0) {
342		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
343		mythresh = "thresh >= .05";
344	} else if (strcmp(name, "clears") == 0) {
345		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
346		printf("          MACHINE_CLEARS.SMC + \n");
347		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
348		mythresh = "thresh >= .02";
349	} else if (strcmp(name, "microassist") == 0) {
350		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
351		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
352		mythresh = "thresh >= .05";
353	} else if (strcmp(name, "fpassist") == 0) {
354		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
355		mythresh = "look for a excessive value";
356	} else if (strcmp(name, "otherassistavx") == 0) {
357		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
358		mythresh = "look for a excessive value";
359	} else if (strcmp(name, "otherassistsse") == 0) {
360		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
361		mythresh = "look for a excessive value";
362	} else {
363		printf("Unknown name:%s\n", name);
364		mythresh = "unknown entry";
365        }
366	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
367}
368
369
370static void
371explain_name_has(const char *name)
372{
373	const char *mythresh;
374	if (strcmp(name, "eff1") == 0) {
375		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
376		mythresh = "thresh < .75";
377	} else if (strcmp(name, "eff2") == 0) {
378		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
379		mythresh = "thresh > 1.0";
380	} else if (strcmp(name, "itlbmiss") == 0) {
381		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
382		mythresh = "thresh > .05";
383	} else if (strcmp(name, "icachemiss") == 0) {
384		printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
385		mythresh = "thresh > .05";
386	} else if (strcmp(name, "lcpstall") == 0) {
387		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
388		mythresh = "thresh > .05";
389	} else if (strcmp(name, "cache1") == 0) {
390		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
391		mythresh = "thresh >= .2";
392	} else if (strcmp(name, "cache2") == 0) {
393		printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
394		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
395		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
396		printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
397		mythresh = "thresh >= .2";
398	} else if (strcmp(name, "contested") == 0) {
399		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
400		mythresh = "thresh >= .05";
401	} else if (strcmp(name, "datashare") == 0) {
402		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
403		mythresh = "thresh > .05";
404	} else if (strcmp(name, "blockstorefwd") == 0) {
405		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
406		mythresh = "thresh >= .05";
407	} else if (strcmp(name, "splitload") == 0) {
408		printf("Examine  (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
409		mythresh = "thresh >= .1";
410	} else if (strcmp(name, "splitstore") == 0) {
411		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
412		mythresh = "thresh >= .01";
413	} else if (strcmp(name, "aliasing_4k") == 0) {
414		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
415		mythresh = "thresh >= .1";
416	} else if (strcmp(name, "dtlbmissload") == 0) {
417		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
418		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
419		mythresh = "thresh >= .1";
420	} else if (strcmp(name, "br_miss") == 0) {
421		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
422		mythresh = "thresh >= .2";
423	} else if (strcmp(name, "clears") == 0) {
424		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
425		printf("          MACHINE_CLEARS.SMC + \n");
426		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
427		mythresh = "thresh >= .02";
428	} else if (strcmp(name, "microassist") == 0) {
429		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
430		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
431		mythresh = "thresh >= .05";
432	} else if (strcmp(name, "fpassist") == 0) {
433		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
434		mythresh = "look for a excessive value";
435	} else if (strcmp(name, "otherassistavx") == 0) {
436		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
437		mythresh = "look for a excessive value";
438	} else if (strcmp(name, "otherassistsse") == 0) {
439		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
440		mythresh = "look for a excessive value";
441	} else {
442		printf("Unknown name:%s\n", name);
443		mythresh = "unknown entry";
444        }
445	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
446}
447
448
449
450static struct counters *
451find_counter(struct counters *base, const char *name)
452{
453	struct counters *at;
454	int len;
455
456	at = base;
457	len = strlen(name);
458	while(at) {
459		if (strncmp(at->counter_name, name, len) == 0) {
460			return(at);
461		}
462		at = at->next_cpu;
463	}
464	printf("Can't find counter %s\n", name);
465	printf("We have:\n");
466	at = base;
467	while(at) {
468		printf("- %s\n", at->counter_name);
469		at = at->next_cpu;
470	}
471	exit(-1);
472}
473
474static int
475allocstall1(struct counters *cpu, int pos)
476{
477/*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
478	int ret;
479	struct counters *partial;
480	struct counters *unhalt;
481	double un, par, res;
482	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
483	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
484	if (pos != -1) {
485		par = partial->vals[pos] * 1.0;
486		un = unhalt->vals[pos] * 1.0;
487	} else {
488		par = partial->sum * 1.0;
489		un = unhalt->sum * 1.0;
490	}
491	res = par/un;
492	ret = printf("%1.3f", res);
493	return(ret);
494}
495
496static int
497allocstall2(struct counters *cpu, int pos)
498{
499/*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
500	int ret;
501	struct counters *partial;
502	struct counters *unhalt;
503	double un, par, res;
504	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
505	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
506	if (pos != -1) {
507		par = partial->vals[pos] * 1.0;
508		un = unhalt->vals[pos] * 1.0;
509	} else {
510		par = partial->sum * 1.0;
511		un = unhalt->sum * 1.0;
512	}
513	res = par/un;
514	ret = printf("%1.3f", res);
515	return(ret);
516}
517
518static int
519br_mispredict(struct counters *cpu, int pos)
520{
521	struct counters *brctr;
522	struct counters *unhalt;
523	int ret;
524/*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
525	double br, un, con, res;
526	con = 20.0;
527
528	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
529        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
530	if (pos != -1) {
531		br = brctr->vals[pos] * 1.0;
532		un = unhalt->vals[pos] * 1.0;
533	} else {
534		br = brctr->sum * 1.0;
535		un = unhalt->sum * 1.0;
536	}
537	res = (con * br)/un;
538 	ret = printf("%1.3f", res);
539	return(ret);
540}
541
542static int
543br_mispredictib(struct counters *cpu, int pos)
544{
545	struct counters *brctr;
546	struct counters *unhalt;
547	struct counters *clear, *clear2, *clear3;
548	struct counters *uops;
549	struct counters *recv;
550	struct counters *iss;
551/*	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
552	int ret;
553        /*
554	 * (BR_MISP_RETIRED.ALL_BRANCHES /
555	 *         (BR_MISP_RETIRED.ALL_BRANCHES +
556	 *          MACHINE_CLEAR.COUNT) *
557	 *	   ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
558	 *
559	 */
560	double br, cl, cl2, cl3, uo, re, un, con, res, is;
561	con = 4.0;
562
563	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
564        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
565	clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
566	clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
567	clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
568	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
569	iss = find_counter(cpu, "UOPS_ISSUED.ANY");
570	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
571	if (pos != -1) {
572		br = brctr->vals[pos] * 1.0;
573		cl = clear->vals[pos] * 1.0;
574		cl2 = clear2->vals[pos] * 1.0;
575		cl3 = clear3->vals[pos] * 1.0;
576		uo = uops->vals[pos] * 1.0;
577		re = recv->vals[pos] * 1.0;
578		is = iss->vals[pos] * 1.0;
579		un = unhalt->vals[pos] * 1.0;
580	} else {
581		br = brctr->sum * 1.0;
582		cl = clear->sum * 1.0;
583		cl2 = clear2->sum * 1.0;
584		cl3 = clear3->sum * 1.0;
585		uo = uops->sum * 1.0;
586		re = recv->sum * 1.0;
587		is = iss->sum * 1.0;
588		un = unhalt->sum * 1.0;
589	}
590	res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
591 	ret = printf("%1.3f", res);
592	return(ret);
593}
594
595
596static int
597br_mispredict_broad(struct counters *cpu, int pos)
598{
599	struct counters *brctr;
600	struct counters *unhalt;
601	struct counters *clear;
602	struct counters *uops;
603	struct counters *uops_ret;
604	struct counters *recv;
605	int ret;
606	double br, cl, uo, uo_r, re, con, un, res;
607
608	con = 4.0;
609
610	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
611        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
612	clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
613	uops = find_counter(cpu, "UOPS_ISSUED.ANY");
614	uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
615	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
616
617	if (pos != -1) {
618		un = unhalt->vals[pos] * 1.0;
619		br = brctr->vals[pos] * 1.0;
620		cl = clear->vals[pos] * 1.0;
621		uo = uops->vals[pos] * 1.0;
622		uo_r = uops_ret->vals[pos] * 1.0;
623		re = recv->vals[pos] * 1.0;
624	} else {
625		un = unhalt->sum * 1.0;
626		br = brctr->sum * 1.0;
627		cl = clear->sum * 1.0;
628		uo = uops->sum * 1.0;
629		uo_r = uops_ret->sum * 1.0;
630		re = recv->sum * 1.0;
631	}
632	res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
633 	ret = printf("%1.3f", res);
634	return(ret);
635}
636
637static int
638splitloadib(struct counters *cpu, int pos)
639{
640	int ret;
641	struct counters *mem;
642	struct counters *l1d, *ldblock;
643	struct counters *unhalt;
644	double un, memd, res, l1, ldb;
645        /*
646	 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
647	 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
648	 */
649
650	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
651	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
652	l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
653	ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
654	if (pos != -1) {
655		memd = mem->vals[pos] * 1.0;
656		l1 = l1d->vals[pos] * 1.0;
657		ldb = ldblock->vals[pos] * 1.0;
658		un = unhalt->vals[pos] * 1.0;
659	} else {
660		memd = mem->sum * 1.0;
661		l1 = l1d->sum * 1.0;
662		ldb = ldblock->sum * 1.0;
663		un = unhalt->sum * 1.0;
664	}
665	res = ((l1 / memd) * ldb)/un;
666	ret = printf("%1.3f", res);
667	return(ret);
668}
669
670
671static int
672splitload(struct counters *cpu, int pos)
673{
674	int ret;
675	struct counters *mem;
676	struct counters *unhalt;
677	double con, un, memd, res;
678/*  4  - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
679
680	con = 5.0;
681	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
682	mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
683	if (pos != -1) {
684		memd = mem->vals[pos] * 1.0;
685		un = unhalt->vals[pos] * 1.0;
686	} else {
687		memd = mem->sum * 1.0;
688		un = unhalt->sum * 1.0;
689	}
690	res = (memd * con)/un;
691	ret = printf("%1.3f", res);
692	return(ret);
693}
694
695
696static int
697splitload_sb(struct counters *cpu, int pos)
698{
699	int ret;
700	struct counters *mem;
701	struct counters *unhalt;
702	double con, un, memd, res;
703/*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
704
705	con = 5.0;
706	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
707	mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
708	if (pos != -1) {
709		memd = mem->vals[pos] * 1.0;
710		un = unhalt->vals[pos] * 1.0;
711	} else {
712		memd = mem->sum * 1.0;
713		un = unhalt->sum * 1.0;
714	}
715	res = (memd * con)/un;
716	ret = printf("%1.3f", res);
717	return(ret);
718}
719
720
721static int
722splitstore_sb(struct counters *cpu, int pos)
723{
724        /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
725	int ret;
726	struct counters *mem_split;
727	struct counters *mem_stores;
728	double memsplit, memstore, res;
729	mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
730	mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
731	if (pos != -1) {
732		memsplit = mem_split->vals[pos] * 1.0;
733		memstore = mem_stores->vals[pos] * 1.0;
734	} else {
735		memsplit = mem_split->sum * 1.0;
736		memstore = mem_stores->sum * 1.0;
737	}
738	res = memsplit/memstore;
739	ret = printf("%1.3f", res);
740	return(ret);
741}
742
743
744
745static int
746splitstore(struct counters *cpu, int pos)
747{
748        /*  5  - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
749	int ret;
750	struct counters *mem_split;
751	struct counters *mem_stores;
752	double memsplit, memstore, res;
753	mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
754	mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
755	if (pos != -1) {
756		memsplit = mem_split->vals[pos] * 1.0;
757		memstore = mem_stores->vals[pos] * 1.0;
758	} else {
759		memsplit = mem_split->sum * 1.0;
760		memstore = mem_stores->sum * 1.0;
761	}
762	res = memsplit/memstore;
763	ret = printf("%1.3f", res);
764	return(ret);
765}
766
767
768static int
769contested(struct counters *cpu, int pos)
770{
771        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
772	int ret;
773	struct counters *mem;
774	struct counters *unhalt;
775	double con, un, memd, res;
776
777	con = 60.0;
778	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
779	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
780	if (pos != -1) {
781		memd = mem->vals[pos] * 1.0;
782		un = unhalt->vals[pos] * 1.0;
783	} else {
784		memd = mem->sum * 1.0;
785		un = unhalt->sum * 1.0;
786	}
787	res = (memd * con)/un;
788	ret = printf("%1.3f", res);
789	return(ret);
790}
791
792static int
793contested_has(struct counters *cpu, int pos)
794{
795        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
796	int ret;
797	struct counters *mem;
798	struct counters *unhalt;
799	double con, un, memd, res;
800
801	con = 84.0;
802	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
803	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
804	if (pos != -1) {
805		memd = mem->vals[pos] * 1.0;
806		un = unhalt->vals[pos] * 1.0;
807	} else {
808		memd = mem->sum * 1.0;
809		un = unhalt->sum * 1.0;
810	}
811	res = (memd * con)/un;
812	ret = printf("%1.3f", res);
813	return(ret);
814}
815
816static int
817contestedbroad(struct counters *cpu, int pos)
818{
819        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
820	int ret;
821	struct counters *mem;
822	struct counters *mem2;
823	struct counters *unhalt;
824	double con, un, memd, memtoo, res;
825
826	con = 84.0;
827	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
828	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
829	mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
830
831	if (pos != -1) {
832		memd = mem->vals[pos] * 1.0;
833		memtoo = mem2->vals[pos] * 1.0;
834		un = unhalt->vals[pos] * 1.0;
835	} else {
836		memd = mem->sum * 1.0;
837		memtoo = mem2->sum * 1.0;
838		un = unhalt->sum * 1.0;
839	}
840	res = ((memd * con) + memtoo)/un;
841	ret = printf("%1.3f", res);
842	return(ret);
843}
844
845
846static int
847blockstoreforward(struct counters *cpu, int pos)
848{
849        /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
850	int ret;
851	struct counters *ldb;
852	struct counters *unhalt;
853	double con, un, ld, res;
854
855	con = 13.0;
856	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
857	ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
858	if (pos != -1) {
859		ld = ldb->vals[pos] * 1.0;
860		un = unhalt->vals[pos] * 1.0;
861	} else {
862		ld = ldb->sum * 1.0;
863		un = unhalt->sum * 1.0;
864	}
865	res = (ld * con)/un;
866	ret = printf("%1.3f", res);
867	return(ret);
868}
869
870static int
871cache2(struct counters *cpu, int pos)
872{
873	/* ** Suspect ***
874	 *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
875	 *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
876	 */
877	int ret;
878	struct counters *mem1, *mem2, *mem3;
879	struct counters *unhalt;
880	double con1, con2, con3, un, me_1, me_2, me_3, res;
881
882	con1 = 26.0;
883	con2 = 43.0;
884	con3 = 60.0;
885	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
886/* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
887	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
888	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
889	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
890	if (pos != -1) {
891		me_1 = mem1->vals[pos] * 1.0;
892		me_2 = mem2->vals[pos] * 1.0;
893		me_3 = mem3->vals[pos] * 1.0;
894		un = unhalt->vals[pos] * 1.0;
895	} else {
896		me_1 = mem1->sum * 1.0;
897		me_2 = mem2->sum * 1.0;
898		me_3 = mem3->sum * 1.0;
899		un = unhalt->sum * 1.0;
900	}
901	res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
902	ret = printf("%1.3f", res);
903	return(ret);
904}
905
906static int
907datasharing(struct counters *cpu, int pos)
908{
909	/*
910	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
911	 */
912	int ret;
913	struct counters *mem;
914	struct counters *unhalt;
915	double con, res, me, un;
916
917	con = 43.0;
918	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
919	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
920	if (pos != -1) {
921		me = mem->vals[pos] * 1.0;
922		un = unhalt->vals[pos] * 1.0;
923	} else {
924		me = mem->sum * 1.0;
925		un = unhalt->sum * 1.0;
926	}
927	res = (me * con)/un;
928	ret = printf("%1.3f", res);
929	return(ret);
930
931}
932
933
934static int
935datasharing_has(struct counters *cpu, int pos)
936{
937	/*
938	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
939	 */
940	int ret;
941	struct counters *mem;
942	struct counters *unhalt;
943	double con, res, me, un;
944
945	con = 72.0;
946	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
947	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
948	if (pos != -1) {
949		me = mem->vals[pos] * 1.0;
950		un = unhalt->vals[pos] * 1.0;
951	} else {
952		me = mem->sum * 1.0;
953		un = unhalt->sum * 1.0;
954	}
955	res = (me * con)/un;
956	ret = printf("%1.3f", res);
957	return(ret);
958
959}
960
961
962static int
963cache2ib(struct counters *cpu, int pos)
964{
965        /*
966	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
967	 */
968	int ret;
969	struct counters *mem;
970	struct counters *unhalt;
971	double con, un, me, res;
972
973	con = 29.0;
974	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
975	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
976	if (pos != -1) {
977		me = mem->vals[pos] * 1.0;
978		un = unhalt->vals[pos] * 1.0;
979	} else {
980		me = mem->sum * 1.0;
981		un = unhalt->sum * 1.0;
982	}
983	res = (con * me)/un;
984	ret = printf("%1.3f", res);
985	return(ret);
986}
987
988static int
989cache2has(struct counters *cpu, int pos)
990{
991	/*
992	 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
993	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
994	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
995	 *           / CPU_CLK_UNHALTED.THREAD_P
996	 */
997	int ret;
998	struct counters *mem1, *mem2, *mem3;
999	struct counters *unhalt;
1000	double con1, con2, con3, un, me1, me2, me3, res;
1001
1002	con1 = 36.0;
1003	con2 = 72.0;
1004	con3 = 84.0;
1005	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1006	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
1007	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
1008	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
1009	if (pos != -1) {
1010		me1 = mem1->vals[pos] * 1.0;
1011		me2 = mem2->vals[pos] * 1.0;
1012		me3 = mem3->vals[pos] * 1.0;
1013		un = unhalt->vals[pos] * 1.0;
1014	} else {
1015		me1 = mem1->sum * 1.0;
1016		me2 = mem2->sum * 1.0;
1017		me3 = mem3->sum * 1.0;
1018		un = unhalt->sum * 1.0;
1019	}
1020	res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
1021	ret = printf("%1.3f", res);
1022	return(ret);
1023}
1024
1025
1026static int
1027cache2broad(struct counters *cpu, int pos)
1028{
1029        /*
1030	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
1031	 */
1032	int ret;
1033	struct counters *mem;
1034	struct counters *unhalt;
1035	double con, un, me, res;
1036
1037	con = 36.0;
1038	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1039	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
1040	if (pos != -1) {
1041		me = mem->vals[pos] * 1.0;
1042		un = unhalt->vals[pos] * 1.0;
1043	} else {
1044		me = mem->sum * 1.0;
1045		un = unhalt->sum * 1.0;
1046	}
1047	res = (con * me)/un;
1048	ret = printf("%1.3f", res);
1049	return(ret);
1050}
1051
1052
1053static int
1054cache1(struct counters *cpu, int pos)
1055{
1056	/*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1057	int ret;
1058	struct counters *mem;
1059	struct counters *unhalt;
1060	double con, un, me, res;
1061
1062	con = 180.0;
1063	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1064	mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
1065	if (pos != -1) {
1066		me = mem->vals[pos] * 1.0;
1067		un = unhalt->vals[pos] * 1.0;
1068	} else {
1069		me = mem->sum * 1.0;
1070		un = unhalt->sum * 1.0;
1071	}
1072	res = (me * con)/un;
1073	ret = printf("%1.3f", res);
1074	return(ret);
1075}
1076
1077static int
1078cache1ib(struct counters *cpu, int pos)
1079{
1080	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1081	int ret;
1082	struct counters *mem;
1083	struct counters *unhalt;
1084	double con, un, me, res;
1085
1086	con = 180.0;
1087	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1088	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
1089	if (pos != -1) {
1090		me = mem->vals[pos] * 1.0;
1091		un = unhalt->vals[pos] * 1.0;
1092	} else {
1093		me = mem->sum * 1.0;
1094		un = unhalt->sum * 1.0;
1095	}
1096	res = (me * con)/un;
1097	ret = printf("%1.3f", res);
1098	return(ret);
1099}
1100
1101
1102static int
1103cache1broad(struct counters *cpu, int pos)
1104{
1105	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1106	int ret;
1107	struct counters *mem;
1108	struct counters *unhalt;
1109	double con, un, me, res;
1110
1111	con = 180.0;
1112	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1113	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
1114	if (pos != -1) {
1115		me = mem->vals[pos] * 1.0;
1116		un = unhalt->vals[pos] * 1.0;
1117	} else {
1118		me = mem->sum * 1.0;
1119		un = unhalt->sum * 1.0;
1120	}
1121	res = (me * con)/un;
1122	ret = printf("%1.3f", res);
1123	return(ret);
1124}
1125
1126
1127static int
1128dtlb_missload(struct counters *cpu, int pos)
1129{
1130	/* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
1131	int ret;
1132	struct counters *dtlb_m, *dtlb_d;
1133	struct counters *unhalt;
1134	double con, un, d1, d2, res;
1135
1136	con = 7.0;
1137	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1138	dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
1139	dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
1140	if (pos != -1) {
1141		d1 = dtlb_m->vals[pos] * 1.0;
1142		d2 = dtlb_d->vals[pos] * 1.0;
1143		un = unhalt->vals[pos] * 1.0;
1144	} else {
1145		d1 = dtlb_m->sum * 1.0;
1146		d2 = dtlb_d->sum * 1.0;
1147		un = unhalt->sum * 1.0;
1148	}
1149	res = ((d1 * con) + d2)/un;
1150	ret = printf("%1.3f", res);
1151	return(ret);
1152}
1153
1154static int
1155dtlb_missstore(struct counters *cpu, int pos)
1156{
1157        /*
1158	 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
1159	 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
1160	 */
1161        int ret;
1162        struct counters *dtsb_m, *dtsb_d;
1163        struct counters *unhalt;
1164        double con, un, d1, d2, res;
1165
1166        con = 7.0;
1167        unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1168        dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
1169        dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
1170        if (pos != -1) {
1171                d1 = dtsb_m->vals[pos] * 1.0;
1172                d2 = dtsb_d->vals[pos] * 1.0;
1173                un = unhalt->vals[pos] * 1.0;
1174        } else {
1175                d1 = dtsb_m->sum * 1.0;
1176                d2 = dtsb_d->sum * 1.0;
1177                un = unhalt->sum * 1.0;
1178        }
1179        res = ((d1 * con) + d2)/un;
1180        ret = printf("%1.3f", res);
1181        return(ret);
1182}
1183
1184static int
1185itlb_miss(struct counters *cpu, int pos)
1186{
1187	/* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
1188	int ret;
1189	struct counters *itlb;
1190	struct counters *unhalt;
1191	double un, d1, res;
1192
1193	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1194	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1195	if (pos != -1) {
1196		d1 = itlb->vals[pos] * 1.0;
1197		un = unhalt->vals[pos] * 1.0;
1198	} else {
1199		d1 = itlb->sum * 1.0;
1200		un = unhalt->sum * 1.0;
1201	}
1202	res = d1/un;
1203	ret = printf("%1.3f", res);
1204	return(ret);
1205}
1206
1207
1208static int
1209itlb_miss_broad(struct counters *cpu, int pos)
1210{
1211	/* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P   */
1212	int ret;
1213	struct counters *itlb;
1214	struct counters *unhalt;
1215	struct counters *four_k;
1216	double un, d1, res, k;
1217
1218	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1219	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1220	four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
1221	if (pos != -1) {
1222		d1 = itlb->vals[pos] * 1.0;
1223		un = unhalt->vals[pos] * 1.0;
1224		k = four_k->vals[pos] * 1.0;
1225	} else {
1226		d1 = itlb->sum * 1.0;
1227		un = unhalt->sum * 1.0;
1228		k = four_k->sum * 1.0;
1229	}
1230	res = (7.0 * k + d1)/un;
1231	ret = printf("%1.3f", res);
1232	return(ret);
1233}
1234
1235
1236static int
1237icache_miss(struct counters *cpu, int pos)
1238{
1239	/* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1240
1241	int ret;
1242	struct counters *itlb, *icache;
1243	struct counters *unhalt;
1244	double un, d1, ic, res;
1245
1246	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1247	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1248	icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1249	if (pos != -1) {
1250		d1 = itlb->vals[pos] * 1.0;
1251		ic = icache->vals[pos] * 1.0;
1252		un = unhalt->vals[pos] * 1.0;
1253	} else {
1254		d1 = itlb->sum * 1.0;
1255		ic = icache->sum * 1.0;
1256		un = unhalt->sum * 1.0;
1257	}
1258	res = (ic-d1)/un;
1259	ret = printf("%1.3f", res);
1260	return(ret);
1261
1262}
1263
1264static int
1265icache_miss_has(struct counters *cpu, int pos)
1266{
1267	/* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1268
1269	int ret;
1270	struct counters *icache;
1271	struct counters *unhalt;
1272	double un, con, ic, res;
1273
1274	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1275	icache = find_counter(cpu, "ICACHE.MISSES");
1276	con = 36.0;
1277	if (pos != -1) {
1278		ic = icache->vals[pos] * 1.0;
1279		un = unhalt->vals[pos] * 1.0;
1280	} else {
1281		ic = icache->sum * 1.0;
1282		un = unhalt->sum * 1.0;
1283	}
1284	res = (con * ic)/un;
1285	ret = printf("%1.3f", res);
1286	return(ret);
1287
1288}
1289
1290static int
1291lcp_stall(struct counters *cpu, int pos)
1292{
1293         /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1294	int ret;
1295	struct counters *ild;
1296	struct counters *unhalt;
1297	double un, d1, res;
1298
1299	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1300	ild = find_counter(cpu, "ILD_STALL.LCP");
1301	if (pos != -1) {
1302		d1 = ild->vals[pos] * 1.0;
1303		un = unhalt->vals[pos] * 1.0;
1304	} else {
1305		d1 = ild->sum * 1.0;
1306		un = unhalt->sum * 1.0;
1307	}
1308	res = d1/un;
1309	ret = printf("%1.3f", res);
1310	return(ret);
1311
1312}
1313
1314
1315static int
1316frontendstall(struct counters *cpu, int pos)
1317{
1318      /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1319	int ret;
1320	struct counters *idq;
1321	struct counters *unhalt;
1322	double con, un, id, res;
1323
1324	con = 4.0;
1325	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1326	idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1327	if (pos != -1) {
1328		id = idq->vals[pos] * 1.0;
1329		un = unhalt->vals[pos] * 1.0;
1330	} else {
1331		id = idq->sum * 1.0;
1332		un = unhalt->sum * 1.0;
1333	}
1334	res = id/(un * con);
1335	ret = printf("%1.3f", res);
1336	return(ret);
1337}
1338
1339static int
1340clears(struct counters *cpu, int pos)
1341{
1342	/* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1343	 *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
1344
1345	int ret;
1346	struct counters *clr1, *clr2, *clr3;
1347	struct counters *unhalt;
1348	double con, un, cl1, cl2, cl3, res;
1349
1350	con = 100.0;
1351	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1352	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1353	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1354	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1355
1356	if (pos != -1) {
1357		cl1 = clr1->vals[pos] * 1.0;
1358		cl2 = clr2->vals[pos] * 1.0;
1359		cl3 = clr3->vals[pos] * 1.0;
1360		un = unhalt->vals[pos] * 1.0;
1361	} else {
1362		cl1 = clr1->sum * 1.0;
1363		cl2 = clr2->sum * 1.0;
1364		cl3 = clr3->sum * 1.0;
1365		un = unhalt->sum * 1.0;
1366	}
1367	res = ((cl1 + cl2 + cl3) * con)/un;
1368	ret = printf("%1.3f", res);
1369	return(ret);
1370}
1371
1372
1373
1374static int
1375clears_broad(struct counters *cpu, int pos)
1376{
1377	int ret;
1378	struct counters *clr1, *clr2, *clr3, *cyc;
1379	struct counters *unhalt;
1380	double con, un, cl1, cl2, cl3, cy, res;
1381
1382	con = 100.0;
1383	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1384	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1385	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1386	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1387	cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
1388	if (pos != -1) {
1389		cl1 = clr1->vals[pos] * 1.0;
1390		cl2 = clr2->vals[pos] * 1.0;
1391		cl3 = clr3->vals[pos] * 1.0;
1392		cy = cyc->vals[pos] * 1.0;
1393		un = unhalt->vals[pos] * 1.0;
1394	} else {
1395		cl1 = clr1->sum * 1.0;
1396		cl2 = clr2->sum * 1.0;
1397		cl3 = clr3->sum * 1.0;
1398		cy = cyc->sum * 1.0;
1399		un = unhalt->sum * 1.0;
1400	}
1401	/* Formula not listed but extrapulated to add the cy ?? */
1402	res = ((cl1 + cl2 + cl3 + cy) * con)/un;
1403	ret = printf("%1.3f", res);
1404	return(ret);
1405}
1406
1407
1408
1409
1410
1411static int
1412microassist(struct counters *cpu, int pos)
1413{
1414	/* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1415	int ret;
1416	struct counters *idq;
1417	struct counters *unhalt;
1418	double un, id, res, con;
1419
1420	con = 4.0;
1421	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1422	idq = find_counter(cpu, "IDQ.MS_UOPS");
1423	if (pos != -1) {
1424		id = idq->vals[pos] * 1.0;
1425		un = unhalt->vals[pos] * 1.0;
1426	} else {
1427		id = idq->sum * 1.0;
1428		un = unhalt->sum * 1.0;
1429	}
1430	res = id/(un * con);
1431	ret = printf("%1.3f", res);
1432	return(ret);
1433}
1434
1435
1436static int
1437microassist_broad(struct counters *cpu, int pos)
1438{
1439	int ret;
1440	struct counters *idq;
1441	struct counters *unhalt;
1442	struct counters *uopiss;
1443	struct counters *uopret;
1444	double un, id, res, con, uoi, uor;
1445
1446	con = 4.0;
1447	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1448	idq = find_counter(cpu, "IDQ.MS_UOPS");
1449	uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
1450	uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1451	if (pos != -1) {
1452		id = idq->vals[pos] * 1.0;
1453		un = unhalt->vals[pos] * 1.0;
1454		uoi = uopiss->vals[pos] * 1.0;
1455		uor = uopret->vals[pos] * 1.0;
1456	} else {
1457		id = idq->sum * 1.0;
1458		un = unhalt->sum * 1.0;
1459		uoi = uopiss->sum * 1.0;
1460		uor = uopret->sum * 1.0;
1461	}
1462	res = (uor/uoi) * (id/(un * con));
1463	ret = printf("%1.3f", res);
1464	return(ret);
1465}
1466
1467
1468static int
1469aliasing(struct counters *cpu, int pos)
1470{
1471	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1472	int ret;
1473	struct counters *ld;
1474	struct counters *unhalt;
1475	double un, lds, con, res;
1476
1477	con = 5.0;
1478	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1479	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1480	if (pos != -1) {
1481		lds = ld->vals[pos] * 1.0;
1482		un = unhalt->vals[pos] * 1.0;
1483	} else {
1484		lds = ld->sum * 1.0;
1485		un = unhalt->sum * 1.0;
1486	}
1487	res = (lds * con)/un;
1488	ret = printf("%1.3f", res);
1489	return(ret);
1490}
1491
1492static int
1493aliasing_broad(struct counters *cpu, int pos)
1494{
1495	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1496	int ret;
1497	struct counters *ld;
1498	struct counters *unhalt;
1499	double un, lds, con, res;
1500
1501	con = 7.0;
1502	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1503	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1504	if (pos != -1) {
1505		lds = ld->vals[pos] * 1.0;
1506		un = unhalt->vals[pos] * 1.0;
1507	} else {
1508		lds = ld->sum * 1.0;
1509		un = unhalt->sum * 1.0;
1510	}
1511	res = (lds * con)/un;
1512	ret = printf("%1.3f", res);
1513	return(ret);
1514}
1515
1516
1517static int
1518fpassists(struct counters *cpu, int pos)
1519{
1520	/* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1521	int ret;
1522	struct counters *fp;
1523	struct counters *inst;
1524	double un, fpd, res;
1525
1526	inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1527	fp = find_counter(cpu, "FP_ASSIST.ANY");
1528	if (pos != -1) {
1529		fpd = fp->vals[pos] * 1.0;
1530		un = inst->vals[pos] * 1.0;
1531	} else {
1532		fpd = fp->sum * 1.0;
1533		un = inst->sum * 1.0;
1534	}
1535	res = fpd/un;
1536	ret = printf("%1.3f", res);
1537	return(ret);
1538}
1539
1540static int
1541otherassistavx(struct counters *cpu, int pos)
1542{
1543	/* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
1544	int ret;
1545	struct counters *oth;
1546	struct counters *unhalt;
1547	double un, ot, con, res;
1548
1549	con = 75.0;
1550	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1551	oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1552	if (pos != -1) {
1553		ot = oth->vals[pos] * 1.0;
1554		un = unhalt->vals[pos] * 1.0;
1555	} else {
1556		ot = oth->sum * 1.0;
1557		un = unhalt->sum * 1.0;
1558	}
1559	res = (ot * con)/un;
1560	ret = printf("%1.3f", res);
1561	return(ret);
1562}
1563
1564static int
1565otherassistsse(struct counters *cpu, int pos)
1566{
1567
1568	int ret;
1569	struct counters *oth;
1570	struct counters *unhalt;
1571	double un, ot, con, res;
1572
1573	/* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
1574	con = 75.0;
1575	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1576	oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1577	if (pos != -1) {
1578		ot = oth->vals[pos] * 1.0;
1579		un = unhalt->vals[pos] * 1.0;
1580	} else {
1581		ot = oth->sum * 1.0;
1582		un = unhalt->sum * 1.0;
1583	}
1584	res = (ot * con)/un;
1585	ret = printf("%1.3f", res);
1586	return(ret);
1587}
1588
1589static int
1590efficiency1(struct counters *cpu, int pos)
1591{
1592
1593	int ret;
1594	struct counters *uops;
1595	struct counters *unhalt;
1596	double un, ot, con, res;
1597
1598        /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1599	con = 4.0;
1600	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1601	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1602	if (pos != -1) {
1603		ot = uops->vals[pos] * 1.0;
1604		un = unhalt->vals[pos] * 1.0;
1605	} else {
1606		ot = uops->sum * 1.0;
1607		un = unhalt->sum * 1.0;
1608	}
1609	res = ot/(con * un);
1610	ret = printf("%1.3f", res);
1611	return(ret);
1612}
1613
1614static int
1615efficiency2(struct counters *cpu, int pos)
1616{
1617
1618	int ret;
1619	struct counters *uops;
1620	struct counters *unhalt;
1621	double un, ot, res;
1622
1623        /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1624	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1625	uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1626	if (pos != -1) {
1627		ot = uops->vals[pos] * 1.0;
1628		un = unhalt->vals[pos] * 1.0;
1629	} else {
1630		ot = uops->sum * 1.0;
1631		un = unhalt->sum * 1.0;
1632	}
1633	res = un/ot;
1634	ret = printf("%1.3f", res);
1635	return(ret);
1636}
1637
1638#define SANDY_BRIDGE_COUNT 20
1639static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1640/*01*/	{ "allocstall1", "thresh > .05",
1641	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1642	  allocstall1, 2 },
1643/* -- not defined for SB right (partial-rat_stalls) 02*/
1644        { "allocstall2", "thresh > .05",
1645	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
1646	  allocstall2, 2 },
1647/*03*/	{ "br_miss", "thresh >= .2",
1648	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1649	  br_mispredict, 2 },
1650/*04*/	{ "splitload", "thresh >= .1",
1651	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1652	  splitload_sb, 2 },
1653/* 05*/	{ "splitstore", "thresh >= .01",
1654	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1655	  splitstore_sb, 2 },
1656/*06*/	{ "contested", "thresh >= .05",
1657	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1658	  contested, 2 },
1659/*07*/	{ "blockstorefwd", "thresh >= .05",
1660	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1661	  blockstoreforward, 2 },
1662/*08*/	{ "cache2", "thresh >= .2",
1663	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1664	  cache2, 4 },
1665/*09*/	{ "cache1", "thresh >= .2",
1666	  "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1667	  cache1, 2 },
1668/*10*/	{ "dtlbmissload", "thresh >= .1",
1669	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1670	  dtlb_missload, 3 },
1671/*11*/	{ "dtlbmissstore", "thresh >= .05",
1672	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1673	  dtlb_missstore, 3 },
1674/*12*/	{ "frontendstall", "thresh >= .15",
1675	  "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1676	  frontendstall, 2 },
1677/*13*/	{ "clears", "thresh >= .02",
1678	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1679	  clears, 4 },
1680/*14*/	{ "microassist", "thresh >= .05",
1681	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1682	  microassist, 2 },
1683/*15*/	{ "aliasing_4k", "thresh >= .1",
1684	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1685	  aliasing, 2 },
1686/*16*/	{ "fpassist", "look for a excessive value",
1687	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1688	  fpassists, 2 },
1689/*17*/	{ "otherassistavx", "look for a excessive value",
1690	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1691	  otherassistavx, 2},
1692/*18*/	{ "otherassistsse", "look for a excessive value",
1693	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1694	  otherassistsse, 2 },
1695/*19*/	{ "eff1", "thresh < .9",
1696	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1697	  efficiency1, 2 },
1698/*20*/	{ "eff2", "thresh > 1.0",
1699	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1700	  efficiency2, 2 },
1701};
1702
1703
1704#define IVY_BRIDGE_COUNT 21
1705static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1706/*1*/	{ "eff1", "thresh < .75",
1707	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1708	  efficiency1, 2 },
1709/*2*/	{ "eff2", "thresh > 1.0",
1710	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1711	  efficiency2, 2 },
1712/*3*/	{ "itlbmiss", "thresh > .05",
1713	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1714	  itlb_miss, 2 },
1715/*4*/	{ "icachemiss", "thresh > .05",
1716	  "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1717	  icache_miss, 3 },
1718/*5*/	{ "lcpstall", "thresh > .05",
1719	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1720	  lcp_stall, 2 },
1721/*6*/	{ "cache1", "thresh >= .2",
1722	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1723	  cache1ib, 2 },
1724/*7*/	{ "cache2", "thresh >= .2",
1725	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1726	  cache2ib, 2 },
1727/*8*/	{ "contested", "thresh >= .05",
1728	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1729	  contested, 2 },
1730/*9*/	{ "datashare", "thresh >= .05",
1731	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1732	  datasharing, 2 },
1733/*10*/	{ "blockstorefwd", "thresh >= .05",
1734	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1735	  blockstoreforward, 2 },
1736/*11*/	{ "splitload", "thresh >= .1",
1737	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1738	  splitloadib, 4 },
1739/*12*/	{ "splitstore", "thresh >= .01",
1740	  "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1741	  splitstore, 2 },
1742/*13*/	{ "aliasing_4k", "thresh >= .1",
1743	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1744	  aliasing, 2 },
1745/*14*/	{ "dtlbmissload", "thresh >= .1",
1746	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1747	  dtlb_missload , 3},
1748/*15*/	{ "dtlbmissstore", "thresh >= .05",
1749	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1750	  dtlb_missstore, 3 },
1751/*16*/	{ "br_miss", "thresh >= .2",
1752	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1753	  br_mispredictib, 8 },
1754/*17*/	{ "clears", "thresh >= .02",
1755	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1756	  clears, 4 },
1757/*18*/	{ "microassist", "thresh >= .05",
1758	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1759	  microassist, 2 },
1760/*19*/	{ "fpassist", "look for a excessive value",
1761	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1762	  fpassists, 2 },
1763/*20*/	{ "otherassistavx", "look for a excessive value",
1764	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1765	  otherassistavx , 2},
1766/*21*/	{ "otherassistsse", "look for a excessive value",
1767	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1768	  otherassistsse, 2 },
1769};
1770
1771#define HASWELL_COUNT 20
1772static struct cpu_entry haswell[HASWELL_COUNT] = {
1773/*1*/	{ "eff1", "thresh < .75",
1774	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1775	  efficiency1, 2 },
1776/*2*/	{ "eff2", "thresh > 1.0",
1777	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1778	  efficiency2, 2 },
1779/*3*/	{ "itlbmiss", "thresh > .05",
1780	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1781	  itlb_miss, 2 },
1782/*4*/	{ "icachemiss", "thresh > .05",
1783	  "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1784	  icache_miss_has, 2 },
1785/*5*/	{ "lcpstall", "thresh > .05",
1786	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1787	  lcp_stall, 2 },
1788/*6*/	{ "cache1", "thresh >= .2",
1789	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1790	  cache1ib, 2 },
1791/*7*/	{ "cache2", "thresh >= .2",
1792	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1793	  cache2has, 4 },
1794/*8*/	{ "contested", "thresh >= .05",
1795	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1796	  contested_has, 2 },
1797/*9*/	{ "datashare", "thresh >= .05",
1798	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1799	  datasharing_has, 2 },
1800/*10*/	{ "blockstorefwd", "thresh >= .05",
1801	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1802	  blockstoreforward, 2 },
1803/*11*/	{ "splitload", "thresh >= .1",
1804	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
1805	  splitload , 2},
1806/*12*/	{ "splitstore", "thresh >= .01",
1807	  "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1808	  splitstore, 2 },
1809/*13*/	{ "aliasing_4k", "thresh >= .1",
1810	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1811	  aliasing, 2 },
1812/*14*/	{ "dtlbmissload", "thresh >= .1",
1813	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1814	  dtlb_missload, 3 },
1815/*15*/	{ "br_miss", "thresh >= .2",
1816	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1817	  br_mispredict, 2 },
1818/*16*/	{ "clears", "thresh >= .02",
1819	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1820	  clears, 4 },
1821/*17*/	{ "microassist", "thresh >= .05",
1822	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1823	  microassist, 2 },
1824/*18*/	{ "fpassist", "look for a excessive value",
1825	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1826	  fpassists, 2 },
1827/*19*/	{ "otherassistavx", "look for a excessive value",
1828	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1829	  otherassistavx, 2 },
1830/*20*/	{ "otherassistsse", "look for a excessive value",
1831	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1832	  otherassistsse, 2 },
1833};
1834
1835
1836static void
1837explain_name_broad(const char *name)
1838{
1839	const char *mythresh;
1840	if (strcmp(name, "eff1") == 0) {
1841		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
1842		mythresh = "thresh < .75";
1843	} else if (strcmp(name, "eff2") == 0) {
1844		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
1845		mythresh = "thresh > 1.0";
1846	} else if (strcmp(name, "itlbmiss") == 0) {
1847		printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
1848		mythresh = "thresh > .05";
1849	} else if (strcmp(name, "icachemiss") == 0) {
1850		printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
1851		mythresh = "thresh > .05";
1852	} else if (strcmp(name, "lcpstall") == 0) {
1853		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
1854		mythresh = "thresh > .05";
1855	} else if (strcmp(name, "cache1") == 0) {
1856		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
1857		mythresh = "thresh >= .1";
1858	} else if (strcmp(name, "cache2") == 0) {
1859		printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
1860		mythresh = "thresh >= .2";
1861	} else if (strcmp(name, "contested") == 0) {
1862		printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) +  MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
1863		mythresh = "thresh >= .05";
1864	} else if (strcmp(name, "datashare") == 0) {
1865		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
1866		mythresh = "thresh > .05";
1867	} else if (strcmp(name, "blockstorefwd") == 0) {
1868		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
1869		mythresh = "thresh >= .05";
1870	} else if (strcmp(name, "aliasing_4k") == 0) {
1871		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
1872		mythresh = "thresh >= .1";
1873	} else if (strcmp(name, "dtlbmissload") == 0) {
1874		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
1875		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
1876		mythresh = "thresh >= .1";
1877
1878	} else if (strcmp(name, "br_miss") == 0) {
1879		printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
1880		printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
1881		printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
1882		mythresh = "thresh >= .2";
1883	} else if (strcmp(name, "clears") == 0) {
1884		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
1885		printf("          MACHINE_CLEARS.SMC + \n");
1886		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
1887		mythresh = "thresh >= .02";
1888	} else if (strcmp(name, "fpassist") == 0) {
1889		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
1890		mythresh = "look for a excessive value";
1891	} else if (strcmp(name, "otherassistavx") == 0) {
1892		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
1893		mythresh = "look for a excessive value";
1894	} else if (strcmp(name, "microassist") == 0) {
1895		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
1896		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
1897		mythresh = "thresh >= .05";
1898	} else {
1899		printf("Unknown name:%s\n", name);
1900		mythresh = "unknown entry";
1901        }
1902	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
1903}
1904
1905
1906#define BROADWELL_COUNT 17
1907static struct cpu_entry broadwell[BROADWELL_COUNT] = {
1908/*1*/	{ "eff1", "thresh < .75",
1909	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1910	  efficiency1, 2 },
1911/*2*/	{ "eff2", "thresh > 1.0",
1912	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1913	  efficiency2, 2 },
1914/*3*/	{ "itlbmiss", "thresh > .05",
1915	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
1916	  itlb_miss_broad, 3 },
1917/*4*/	{ "icachemiss", "thresh > .05",
1918	  "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1919	  icache_miss_has, 2 },
1920/*5*/	{ "lcpstall", "thresh > .05",
1921	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1922	  lcp_stall, 2 },
1923/*6*/	{ "cache1", "thresh >= .1",
1924	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1925	  cache1broad, 2 },
1926/*7*/	{ "cache2", "thresh >= .2",
1927	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1928	  cache2broad, 2 },
1929/*8*/	{ "contested", "thresh >= .05",
1930	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
1931	  contestedbroad, 2 },
1932/*9*/	{ "datashare", "thresh >= .05",
1933	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1934	  datasharing_has, 2 },
1935/*10*/	{ "blockstorefwd", "thresh >= .05",
1936	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1937	  blockstoreforward, 2 },
1938/*11*/	{ "aliasing_4k", "thresh >= .1",
1939	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1940	  aliasing_broad, 2 },
1941/*12*/	{ "dtlbmissload", "thresh >= .1",
1942	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1943	  dtlb_missload, 3 },
1944/*13*/	{ "br_miss", "thresh >= .2",
1945	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1946	  br_mispredict_broad, 7 },
1947/*14*/	{ "clears", "thresh >= .02",
1948	  "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1949	  clears_broad, 5 },
1950/*15*/	{ "fpassist", "look for a excessive value",
1951	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1952	  fpassists, 2 },
1953/*16*/	{ "otherassistavx", "look for a excessive value",
1954	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1955	  otherassistavx, 2 },
1956/*17*/	{ "microassist", "thresh >= .2",
1957	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS  -w 1",
1958	  microassist_broad, 4 },
1959};
1960
1961
1962static void
1963set_sandybridge(void)
1964{
1965	strcpy(the_cpu.cputype, "SandyBridge PMC");
1966	the_cpu.number = SANDY_BRIDGE_COUNT;
1967	the_cpu.ents = sandy_bridge;
1968	the_cpu.explain = explain_name_sb;
1969}
1970
1971static void
1972set_ivybridge(void)
1973{
1974	strcpy(the_cpu.cputype, "IvyBridge PMC");
1975	the_cpu.number = IVY_BRIDGE_COUNT;
1976	the_cpu.ents = ivy_bridge;
1977	the_cpu.explain = explain_name_ib;
1978}
1979
1980
1981static void
1982set_haswell(void)
1983{
1984	strcpy(the_cpu.cputype, "HASWELL PMC");
1985	the_cpu.number = HASWELL_COUNT;
1986	the_cpu.ents = haswell;
1987	the_cpu.explain = explain_name_has;
1988}
1989
1990
1991static void
1992set_broadwell(void)
1993{
1994	strcpy(the_cpu.cputype, "HASWELL PMC");
1995	the_cpu.number = BROADWELL_COUNT;
1996	the_cpu.ents = broadwell;
1997	the_cpu.explain = explain_name_broad;
1998}
1999
2000
2001static int
2002set_expression(const char *name)
2003{
2004	int found = 0, i;
2005	for(i=0 ; i< the_cpu.number; i++) {
2006		if (strcmp(name, the_cpu.ents[i].name) == 0) {
2007			found = 1;
2008			expression = the_cpu.ents[i].func;
2009			command = the_cpu.ents[i].command;
2010			threshold = the_cpu.ents[i].thresh;
2011			if  (the_cpu.ents[i].counters_required > max_pmc_counters) {
2012				printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
2013				       the_cpu.ents[i].name,
2014				       the_cpu.ents[i].counters_required, max_pmc_counters);
2015				printf("Sorry this test can not be run\n");
2016				if (run_all == 0) {
2017					exit(-1);
2018				} else {
2019					return(-1);
2020				}
2021			}
2022			break;
2023		}
2024	}
2025	if (!found) {
2026		printf("For CPU type %s we have no expression:%s\n",
2027		       the_cpu.cputype, name);
2028		exit(-1);
2029	}
2030	return(0);
2031}
2032
2033
2034
2035
2036
2037static int
2038validate_expression(char *name)
2039{
2040	int i, found;
2041
2042	found = 0;
2043	for(i=0 ; i< the_cpu.number; i++) {
2044		if (strcmp(name, the_cpu.ents[i].name) == 0) {
2045			found = 1;
2046			break;
2047		}
2048	}
2049	if (!found) {
2050		return(-1);
2051	}
2052	return (0);
2053}
2054
2055static void
2056do_expression(struct counters *cpu, int pos)
2057{
2058	if (expression == NULL)
2059		return;
2060	(*expression)(cpu, pos);
2061}
2062
2063static void
2064process_header(int idx, char *p)
2065{
2066	struct counters *up;
2067	int i, len, nlen;
2068	/*
2069	 * Given header element idx, at p in
2070	 * form 's/NN/nameof'
2071	 * process the entry to pull out the name and
2072	 * the CPU number.
2073	 */
2074	if (strncmp(p, "s/", 2)) {
2075		printf("Check -- invalid header no s/ in %s\n",
2076		       p);
2077		return;
2078	}
2079	up = &cnts[idx];
2080	up->cpu = strtol(&p[2], NULL, 10);
2081	len = strlen(p);
2082	for (i=2; i<len; i++) {
2083		if (p[i] == '/') {
2084			nlen = strlen(&p[(i+1)]);
2085			if (nlen < (MAX_NLEN-1)) {
2086				strcpy(up->counter_name, &p[(i+1)]);
2087			} else {
2088				strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
2089			}
2090		}
2091	}
2092}
2093
2094static void
2095build_counters_from_header(FILE *io)
2096{
2097	char buffer[8192], *p;
2098	int i, len, cnt;
2099	size_t mlen;
2100
2101	/* We have a new start, lets
2102	 * setup our headers and cpus.
2103	 */
2104	if (fgets(buffer, sizeof(buffer), io) == NULL) {
2105		printf("First line can't be read from file err:%d\n", errno);
2106		return;
2107	}
2108	/*
2109	 * Ok output is an array of counters. Once
2110	 * we start to read the values in we must
2111	 * put them in there slot to match there CPU and
2112	 * counter being updated. We create a mass array
2113	 * of the counters, filling in the CPU and
2114	 * counter name.
2115	 */
2116	/* How many do we get? */
2117	len = strlen(buffer);
2118	for (i=0, cnt=0; i<len; i++) {
2119		if (strncmp(&buffer[i], "s/", 2) == 0) {
2120			cnt++;
2121			for(;i<len;i++) {
2122				if (buffer[i] == ' ')
2123					break;
2124			}
2125		}
2126	}
2127	mlen = sizeof(struct counters) * cnt;
2128	cnts = malloc(mlen);
2129	ncnts = cnt;
2130	if (cnts == NULL) {
2131		printf("No memory err:%d\n", errno);
2132		return;
2133	}
2134	memset(cnts, 0, mlen);
2135	for (i=0, cnt=0; i<len; i++) {
2136		if (strncmp(&buffer[i], "s/", 2) == 0) {
2137			p = &buffer[i];
2138			for(;i<len;i++) {
2139				if (buffer[i] == ' ') {
2140					buffer[i] = 0;
2141					break;
2142				}
2143			}
2144			process_header(cnt, p);
2145			cnt++;
2146		}
2147	}
2148	if (verbose)
2149		printf("We have %d entries\n", cnt);
2150}
2151extern int max_to_collect;
2152int max_to_collect = MAX_COUNTER_SLOTS;
2153
2154static int
2155read_a_line(FILE *io)
2156{
2157	char buffer[8192], *p, *stop;
2158	int pos, i;
2159
2160	if (fgets(buffer, sizeof(buffer), io) == NULL) {
2161		return(0);
2162	}
2163	p = buffer;
2164	for (i=0; i<ncnts; i++) {
2165		pos = cnts[i].pos;
2166		cnts[i].vals[pos] = strtol(p, &stop, 0);
2167		cnts[i].pos++;
2168		cnts[i].sum += cnts[i].vals[pos];
2169		p = stop;
2170	}
2171	return (1);
2172}
2173
2174extern int cpu_count_out;
2175int cpu_count_out=0;
2176
2177static void
2178print_header(void)
2179{
2180	int i, cnt, printed_cnt;
2181
2182	printf("*********************************\n");
2183	for(i=0, cnt=0; i<MAX_CPU; i++) {
2184		if (glob_cpu[i]) {
2185			cnt++;
2186		}
2187	}
2188	cpu_count_out = cnt;
2189	for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
2190		if (glob_cpu[i]) {
2191			printf("CPU%d", i);
2192			printed_cnt++;
2193		}
2194		if (printed_cnt == cnt) {
2195			printf("\n");
2196			break;
2197		} else {
2198			printf("\t");
2199		}
2200	}
2201}
2202
2203static void
2204lace_cpus_together(void)
2205{
2206	int i, j, lace_cpu;
2207	struct counters *cpat, *at;
2208
2209	for(i=0; i<ncnts; i++) {
2210		cpat = &cnts[i];
2211		if (cpat->next_cpu) {
2212			/* Already laced in */
2213			continue;
2214		}
2215		lace_cpu = cpat->cpu;
2216		if (lace_cpu >= MAX_CPU) {
2217			printf("CPU %d to big\n", lace_cpu);
2218			continue;
2219		}
2220		if (glob_cpu[lace_cpu] == NULL) {
2221			glob_cpu[lace_cpu] = cpat;
2222		} else {
2223			/* Already processed this cpu */
2224			continue;
2225		}
2226		/* Ok look forward for cpu->cpu and link in */
2227		for(j=(i+1); j<ncnts; j++) {
2228			at = &cnts[j];
2229			if (at->next_cpu) {
2230				continue;
2231			}
2232			if (at->cpu == lace_cpu) {
2233				/* Found one */
2234				cpat->next_cpu = at;
2235				cpat = at;
2236			}
2237		}
2238	}
2239}
2240
2241
2242static void
2243process_file(char *filename)
2244{
2245	FILE *io;
2246	int i;
2247	int line_at, not_done;
2248	pid_t pid_of_command=0;
2249
2250	if (filename ==  NULL) {
2251		io = my_popen(command, "r", &pid_of_command);
2252	} else {
2253		io = fopen(filename, "r");
2254		if (io == NULL) {
2255			printf("Can't process file %s err:%d\n",
2256			       filename, errno);
2257			return;
2258		}
2259	}
2260	build_counters_from_header(io);
2261	if (cnts == NULL) {
2262		/* Nothing we can do */
2263		printf("Nothing to do -- no counters built\n");
2264		if (io) {
2265			fclose(io);
2266		}
2267		return;
2268	}
2269	lace_cpus_together();
2270	print_header();
2271	if (verbose) {
2272		for (i=0; i<ncnts; i++) {
2273			printf("Counter:%s cpu:%d index:%d\n",
2274			       cnts[i].counter_name,
2275			       cnts[i].cpu, i);
2276		}
2277	}
2278	line_at = 0;
2279	not_done = 1;
2280	while(not_done) {
2281		if (read_a_line(io)) {
2282			line_at++;
2283		} else {
2284			break;
2285		}
2286		if (line_at >= max_to_collect) {
2287			not_done = 0;
2288		}
2289		if (filename == NULL) {
2290			int cnt;
2291			/* For the ones we dynamically open we print now */
2292			for(i=0, cnt=0; i<MAX_CPU; i++) {
2293				do_expression(glob_cpu[i], (line_at-1));
2294				cnt++;
2295				if (cnt == cpu_count_out) {
2296					printf("\n");
2297					break;
2298				} else {
2299					printf("\t");
2300				}
2301			}
2302		}
2303	}
2304	if (filename) {
2305		fclose(io);
2306	} else {
2307		my_pclose(io, pid_of_command);
2308	}
2309}
2310#if defined(__amd64__)
2311#define cpuid(in,a,b,c,d)\
2312  asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
2313
2314static __inline void
2315do_cpuid(u_int ax, u_int cx, u_int *p)
2316{
2317	__asm __volatile("cpuid"
2318			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
2319			 :  "0" (ax), "c" (cx) );
2320}
2321
2322#else
2323#define cpuid(in, a, b, c, d)
2324#define do_cpuid(ax, cx, p)
2325#endif
2326
2327static void
2328get_cpuid_set(void)
2329{
2330	unsigned long eax, ebx, ecx, edx;
2331	int model;
2332	pid_t pid_of_command=0;
2333	size_t sz, len;
2334	FILE *io;
2335	char linebuf[1024], *str;
2336	u_int reg[4];
2337
2338	eax = ebx = ecx = edx = 0;
2339
2340	cpuid(0, eax, ebx, ecx, edx);
2341	if (ebx == 0x68747541) {
2342		printf("AMD processors are not supported by this program\n");
2343		printf("Sorry\n");
2344		exit(0);
2345	} else if (ebx == 0x6972794) {
2346		printf("Cyrix processors are not supported by this program\n");
2347		printf("Sorry\n");
2348		exit(0);
2349	} else if (ebx == 0x756e6547) {
2350		printf("Genuine Intel\n");
2351	} else {
2352		printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
2353		exit(0);
2354	}
2355	cpuid(1, eax, ebx, ecx, edx);
2356	model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
2357	printf("CPU model is 0x%x id:0x%lx\n", model, eax);
2358	switch (eax & 0xF00) {
2359	case 0x500:		/* Pentium family processors */
2360		printf("Intel Pentium P5\n");
2361		goto not_supported;
2362		break;
2363	case 0x600:		/* Pentium Pro, Celeron, Pentium II & III */
2364		switch (model) {
2365		case 0x1:
2366			printf("Intel Pentium P6\n");
2367			goto not_supported;
2368			break;
2369		case 0x3:
2370		case 0x5:
2371			printf("Intel PII\n");
2372			goto not_supported;
2373			break;
2374		case 0x6: case 0x16:
2375			printf("Intel CL\n");
2376			goto not_supported;
2377			break;
2378		case 0x7: case 0x8: case 0xA: case 0xB:
2379			printf("Intel PIII\n");
2380			goto not_supported;
2381			break;
2382		case 0x9: case 0xD:
2383			printf("Intel PM\n");
2384			goto not_supported;
2385			break;
2386		case 0xE:
2387			printf("Intel CORE\n");
2388			goto not_supported;
2389			break;
2390		case 0xF:
2391			printf("Intel CORE2\n");
2392			goto not_supported;
2393			break;
2394		case 0x17:
2395			printf("Intel CORE2EXTREME\n");
2396			goto not_supported;
2397			break;
2398		case 0x1C:	/* Per Intel document 320047-002. */
2399			printf("Intel ATOM\n");
2400			goto not_supported;
2401			break;
2402		case 0x1A:
2403		case 0x1E:	/*
2404				 * Per Intel document 253669-032 9/2009,
2405				 * pages A-2 and A-57
2406				 */
2407		case 0x1F:	/*
2408				 * Per Intel document 253669-032 9/2009,
2409				 * pages A-2 and A-57
2410				 */
2411			printf("Intel COREI7\n");
2412			goto not_supported;
2413			break;
2414		case 0x2E:
2415			printf("Intel NEHALEM\n");
2416			goto not_supported;
2417			break;
2418		case 0x25:	/* Per Intel document 253669-033US 12/2009. */
2419		case 0x2C:	/* Per Intel document 253669-033US 12/2009. */
2420			printf("Intel WESTMERE\n");
2421			goto not_supported;
2422			break;
2423		case 0x2F:	/* Westmere-EX, seen in wild */
2424			printf("Intel WESTMERE\n");
2425			goto not_supported;
2426			break;
2427		case 0x2A:	/* Per Intel document 253669-039US 05/2011. */
2428			printf("Intel SANDYBRIDGE\n");
2429			set_sandybridge();
2430			break;
2431		case 0x2D:	/* Per Intel document 253669-044US 08/2012. */
2432			printf("Intel SANDYBRIDGE_XEON\n");
2433			set_sandybridge();
2434			break;
2435		case 0x3A:	/* Per Intel document 253669-043US 05/2012. */
2436			printf("Intel IVYBRIDGE\n");
2437			set_ivybridge();
2438			break;
2439		case 0x3E:	/* Per Intel document 325462-045US 01/2013. */
2440			printf("Intel IVYBRIDGE_XEON\n");
2441			set_ivybridge();
2442			break;
2443		case 0x3F:	/* Per Intel document 325462-045US 09/2014. */
2444			printf("Intel HASWELL (Xeon)\n");
2445			set_haswell();
2446			break;
2447		case 0x3C:	/* Per Intel document 325462-045US 01/2013. */
2448		case 0x45:
2449		case 0x46:
2450			printf("Intel HASWELL\n");
2451			set_haswell();
2452			break;
2453
2454		case 0x4e:
2455		case 0x5e:
2456			printf("Intel SKY-LAKE\n");
2457			goto not_supported;
2458			break;
2459		case 0x3D:
2460		case 0x47:
2461			printf("Intel BROADWELL\n");
2462			set_broadwell();
2463			break;
2464		case 0x4f:
2465		case 0x56:
2466			printf("Intel BROADWEL (Xeon)\n");
2467			set_broadwell();
2468			break;
2469
2470		case 0x4D:
2471			/* Per Intel document 330061-001 01/2014. */
2472			printf("Intel ATOM_SILVERMONT\n");
2473			goto not_supported;
2474			break;
2475		default:
2476			printf("Intel model 0x%x is not known -- sorry\n",
2477			       model);
2478			goto not_supported;
2479			break;
2480		}
2481		break;
2482	case 0xF00:		/* P4 */
2483		printf("Intel unknown model %d\n", model);
2484		goto not_supported;
2485		break;
2486	}
2487	do_cpuid(0xa, 0, reg);
2488	max_pmc_counters = (reg[3] & 0x0000000f) + 1;
2489	printf("We have %d PMC counters to work with\n", max_pmc_counters);
2490	/* Ok lets load the list of all known PMC's */
2491	io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2492	if (valid_pmcs == NULL) {
2493		/* Likely */
2494		pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2495		sz = sizeof(char *) * pmc_allocated_cnt;
2496		valid_pmcs = malloc(sz);
2497		if (valid_pmcs == NULL) {
2498			printf("No memory allocation fails at startup?\n");
2499			exit(-1);
2500		}
2501		memset(valid_pmcs, 0, sz);
2502	}
2503
2504	while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2505		if (linebuf[0] != '\t') {
2506			/* sometimes headers ;-) */
2507			continue;
2508		}
2509		len = strlen(linebuf);
2510		if (linebuf[(len-1)] == '\n') {
2511			/* Likely */
2512			linebuf[(len-1)] = 0;
2513		}
2514		str = &linebuf[1];
2515		len = strlen(str) + 1;
2516		valid_pmcs[valid_pmc_cnt] = malloc(len);
2517		if (valid_pmcs[valid_pmc_cnt] == NULL) {
2518			printf("No memory2 allocation fails at startup?\n");
2519			exit(-1);
2520		}
2521		memset(valid_pmcs[valid_pmc_cnt], 0, len);
2522		strcpy(valid_pmcs[valid_pmc_cnt], str);
2523		valid_pmc_cnt++;
2524		if (valid_pmc_cnt >= pmc_allocated_cnt) {
2525			/* Got to expand -- unlikely */
2526			char **more;
2527
2528			sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2529			more = malloc(sz);
2530			if (more == NULL) {
2531				printf("No memory3 allocation fails at startup?\n");
2532				exit(-1);
2533			}
2534			memset(more, 0, sz);
2535			memcpy(more, valid_pmcs, sz);
2536			pmc_allocated_cnt *= 2;
2537			free(valid_pmcs);
2538			valid_pmcs = more;
2539		}
2540	}
2541	my_pclose(io, pid_of_command);
2542	return;
2543not_supported:
2544	printf("Not supported\n");
2545	exit(-1);
2546}
2547
2548static void
2549explain_all(void)
2550{
2551	int i;
2552	printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2553	printf("-------------------------------------------------------------\n");
2554	for(i=0; i<the_cpu.number; i++){
2555		printf("For -e %s ", the_cpu.ents[i].name);
2556		(*the_cpu.explain)(the_cpu.ents[i].name);
2557		printf("----------------------------\n");
2558	}
2559}
2560
2561static void
2562test_for_a_pmc(const char *pmc, int out_so_far)
2563{
2564	FILE *io;
2565	pid_t pid_of_command=0;
2566	char my_command[1024];
2567	char line[1024];
2568	char resp[1024];
2569	int len, llen, i;
2570
2571	if (out_so_far < 50) {
2572		len = 50 - out_so_far;
2573		for(i=0; i<len; i++) {
2574			printf(" ");
2575		}
2576	}
2577	sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2578	io = my_popen(my_command, "r", &pid_of_command);
2579	if (io == NULL) {
2580		printf("Failed -- popen fails\n");
2581		return;
2582	}
2583	/* Setup what we expect */
2584	len = sprintf(resp, "%s", pmc);
2585	if (fgets(line, sizeof(line), io) == NULL) {
2586		printf("Failed -- no output from pmstat\n");
2587		goto out;
2588	}
2589	llen = strlen(line);
2590	if (line[(llen-1)] == '\n') {
2591		line[(llen-1)] = 0;
2592		llen--;
2593	}
2594	for(i=2; i<(llen-len); i++) {
2595		if (strncmp(&line[i], "ERROR", 5) == 0) {
2596			printf("Failed %s\n", line);
2597			goto out;
2598		} else if (strncmp(&line[i], resp, len) == 0) {
2599			int j, k;
2600
2601			if (fgets(line, sizeof(line), io) == NULL) {
2602				printf("Failed -- no second output from pmstat\n");
2603				goto out;
2604			}
2605			len = strlen(line);
2606			for (j=0; j<len; j++) {
2607				if (line[j] == ' ') {
2608					j++;
2609				} else {
2610					break;
2611				}
2612			}
2613			printf("Pass");
2614			len = strlen(&line[j]);
2615			if (len < 20) {
2616				for(k=0; k<(20-len); k++) {
2617					printf(" ");
2618				}
2619			}
2620			if (len) {
2621				printf("%s", &line[j]);
2622			} else {
2623				printf("\n");
2624			}
2625			goto out;
2626		}
2627	}
2628	printf("Failed -- '%s' not '%s'\n", line, resp);
2629out:
2630	my_pclose(io, pid_of_command);
2631
2632}
2633
2634static int
2635add_it_to(char **vars, int cur_cnt, char *name)
2636{
2637	int i;
2638	size_t len;
2639	for(i=0; i<cur_cnt; i++) {
2640		if (strcmp(vars[i], name) == 0) {
2641			/* Already have */
2642			return(0);
2643		}
2644	}
2645	if (vars[cur_cnt] != NULL) {
2646		printf("Cur_cnt:%d filled with %s??\n",
2647		       cur_cnt, vars[cur_cnt]);
2648		exit(-1);
2649	}
2650	/* Ok its new */
2651	len = strlen(name) + 1;
2652	vars[cur_cnt] = malloc(len);
2653	if (vars[cur_cnt] == NULL) {
2654		printf("No memory %s\n", __FUNCTION__);
2655		exit(-1);
2656	}
2657	memset(vars[cur_cnt], 0, len);
2658	strcpy(vars[cur_cnt], name);
2659	return(1);
2660}
2661
2662static char *
2663build_command_for_exp(struct expression *exp)
2664{
2665	/*
2666	 * Build the pmcstat command to handle
2667	 * the passed in expression.
2668	 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2669	 * where NNN and QQQ represent the PMC's in the expression
2670	 * uniquely..
2671	 */
2672	char forming[1024];
2673	int cnt_pmc, alloced_pmcs, i;
2674	struct expression *at;
2675	char **vars, *cmd;
2676	size_t mal;
2677
2678	alloced_pmcs = cnt_pmc = 0;
2679	/* first how many do we have */
2680	at = exp;
2681	while (at) {
2682		if (at->type == TYPE_VALUE_PMC) {
2683			cnt_pmc++;
2684		}
2685		at = at->next;
2686	}
2687	if (cnt_pmc == 0) {
2688		printf("No PMC's in your expression -- nothing to do!!\n");
2689		exit(0);
2690	}
2691	mal = cnt_pmc * sizeof(char *);
2692	vars = malloc(mal);
2693	if (vars == NULL) {
2694		printf("No memory\n");
2695		exit(-1);
2696	}
2697	memset(vars, 0, mal);
2698	at = exp;
2699	while (at) {
2700		if (at->type == TYPE_VALUE_PMC) {
2701			if(add_it_to(vars, alloced_pmcs, at->name)) {
2702				alloced_pmcs++;
2703			}
2704		}
2705		at = at->next;
2706	}
2707	/* Now we have a unique list in vars so create our command */
2708	mal = 23; /*	"/usr/sbin/pmcstat -w 1"  + \0 */
2709	for(i=0; i<alloced_pmcs; i++) {
2710		mal += strlen(vars[i]) + 4;	/* var + " -s " */
2711	}
2712	cmd = malloc((mal+2));
2713	if (cmd == NULL) {
2714		printf("%s out of mem\n", __FUNCTION__);
2715		exit(-1);
2716	}
2717	memset(cmd, 0, (mal+2));
2718	strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2719	at = exp;
2720	for(i=0; i<alloced_pmcs; i++) {
2721		sprintf(forming, " -s %s", vars[i]);
2722		strcat(cmd, forming);
2723		free(vars[i]);
2724		vars[i] = NULL;
2725	}
2726	free(vars);
2727	return(cmd);
2728}
2729
2730static int
2731user_expr(struct counters *cpu, int pos)
2732{
2733	int ret;
2734	double res;
2735	struct counters *var;
2736	struct expression *at;
2737
2738	at = master_exp;
2739	while (at) {
2740		if (at->type == TYPE_VALUE_PMC) {
2741			var = find_counter(cpu, at->name);
2742			if (var == NULL) {
2743				printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2744				exit(-1);
2745			}
2746			if (pos != -1) {
2747				at->value = var->vals[pos] * 1.0;
2748			} else {
2749				at->value = var->sum * 1.0;
2750			}
2751		}
2752		at = at->next;
2753	}
2754	res = run_expr(master_exp, 1, NULL);
2755	ret = printf("%1.3f", res);
2756	return(ret);
2757}
2758
2759
2760static void
2761set_manual_exp(struct expression *exp)
2762{
2763	expression = user_expr;
2764	command = build_command_for_exp(exp);
2765	threshold = "User defined threshold";
2766}
2767
2768static void
2769run_tests(void)
2770{
2771	int i, lenout;
2772	printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2773	printf("------------------------------------------------------------------------\n");
2774	for(i=0; i<valid_pmc_cnt; i++) {
2775		lenout = printf("%s", valid_pmcs[i]);
2776		fflush(stdout);
2777		test_for_a_pmc(valid_pmcs[i], lenout);
2778	}
2779}
2780static void
2781list_all(void)
2782{
2783	int i, cnt, j;
2784	printf("PMC                                               Abbreviation\n");
2785	printf("--------------------------------------------------------------\n");
2786	for(i=0; i<valid_pmc_cnt; i++) {
2787		cnt = printf("%s", valid_pmcs[i]);
2788		for(j=cnt; j<52; j++) {
2789			printf(" ");
2790		}
2791		printf("%%%d\n", i);
2792	}
2793}
2794
2795
2796int
2797main(int argc, char **argv)
2798{
2799	int i, j, cnt;
2800	char *filename=NULL;
2801	const char *name=NULL;
2802	int help_only = 0;
2803	int test_mode = 0;
2804	int test_at = 0;
2805
2806	get_cpuid_set();
2807	memset(glob_cpu, 0, sizeof(glob_cpu));
2808	while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
2809		switch (i) {
2810		case 'A':
2811			run_all = 1;
2812			break;
2813		case 'L':
2814			list_all();
2815			return(0);
2816		case 'H':
2817			printf("**********************************\n");
2818			explain_all();
2819			printf("**********************************\n");
2820			return(0);
2821			break;
2822		case 'T':
2823			test_mode = 1;
2824			break;
2825		case 'E':
2826			master_exp = parse_expression(optarg);
2827			if (master_exp) {
2828				set_manual_exp(master_exp);
2829			}
2830			break;
2831		case 'e':
2832			if (validate_expression(optarg)) {
2833				printf("Unknown expression %s\n", optarg);
2834				return(0);
2835			}
2836			name = optarg;
2837			set_expression(optarg);
2838			break;
2839		case 'm':
2840			max_to_collect = strtol(optarg, NULL, 0);
2841			if (max_to_collect > MAX_COUNTER_SLOTS) {
2842				/* You can't collect more than max in array */
2843				max_to_collect = MAX_COUNTER_SLOTS;
2844			}
2845			break;
2846		case 'v':
2847			verbose++;
2848			break;
2849		case 'h':
2850			help_only = 1;
2851			break;
2852		case 'i':
2853			filename = optarg;
2854			break;
2855		case '?':
2856		default:
2857		use:
2858			printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2859			       argv[0]);
2860			printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2861			printf("-v -- verbose dump debug type things -- you don't want this\n");
2862			printf("-m N -- maximum to collect is N measurments\n");
2863			printf("-e expr-name -- Do expression expr-name\n");
2864			printf("-E 'your expression' -- Do your expression\n");
2865			printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2866			printf("-H -- Don't run anything, just explain all canned expressions\n");
2867			printf("-T -- Test all PMC's defined by this processor\n");
2868			printf("-A -- Run all canned tests\n");
2869			return(0);
2870			break;
2871		}
2872	}
2873	if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
2874	    (test_mode == 0) && (master_exp == NULL)) {
2875		printf("Without setting an expression we cannot dynamically gather information\n");
2876		printf("you must supply a filename (and you probably want verbosity)\n");
2877		goto use;
2878	}
2879	if (run_all && max_to_collect > 10) {
2880		max_to_collect = 3;
2881	}
2882	if (test_mode) {
2883		run_tests();
2884		return(0);
2885	}
2886	printf("*********************************\n");
2887	if ((master_exp == NULL) && name) {
2888		(*the_cpu.explain)(name);
2889	} else if (master_exp) {
2890		printf("Examine your expression ");
2891		print_exp(master_exp);
2892		printf("User defined threshold\n");
2893	}
2894	if (help_only) {
2895		return(0);
2896	}
2897	if (run_all) {
2898	more:
2899		name = the_cpu.ents[test_at].name;
2900		printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
2901		test_at++;
2902		if (set_expression(name) == -1) {
2903			if (test_at >= the_cpu.number) {
2904				goto done;
2905			} else
2906				goto more;
2907		}
2908
2909	}
2910	process_file(filename);
2911	if (verbose >= 2) {
2912		for (i=0; i<ncnts; i++) {
2913			printf("Counter:%s cpu:%d index:%d\n",
2914			       cnts[i].counter_name,
2915			       cnts[i].cpu, i);
2916			for(j=0; j<cnts[i].pos; j++) {
2917				printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2918			}
2919			printf(" sum - %ld\n", (long int)cnts[i].sum);
2920		}
2921	}
2922	if (expression == NULL) {
2923		return(0);
2924	}
2925	if (max_to_collect > 1) {
2926		for(i=0, cnt=0; i<MAX_CPU; i++) {
2927			if (glob_cpu[i]) {
2928				do_expression(glob_cpu[i], -1);
2929				cnt++;
2930				if (cnt == cpu_count_out) {
2931					printf("\n");
2932					break;
2933				} else {
2934					printf("\t");
2935				}
2936			}
2937		}
2938	}
2939	if (run_all && (test_at < the_cpu.number)) {
2940		memset(glob_cpu, 0, sizeof(glob_cpu));
2941		ncnts = 0;
2942		printf("*********************************\n");
2943		goto more;
2944	} else if (run_all) {
2945	done:
2946		printf("*********************************\n");
2947	}
2948	return(0);
2949}
2950