1/*-
2 * Copyright (c) 2014-2015 Netflix, Inc.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer,
9 *    in this position and unchanged.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote products
14 *    derived from this software without specific prior written permission
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27#include <sys/types.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <unistd.h>
31#include <string.h>
32#include <strings.h>
33#include <sys/errno.h>
34#include <signal.h>
35#include <sys/wait.h>
36#include <getopt.h>
37#include "eval_expr.h"
38static int max_pmc_counters = 1;
39static int run_all = 0;
40
41#define MAX_COUNTER_SLOTS 1024
42#define MAX_NLEN 64
43#define MAX_CPU 64
44static int verbose = 0;
45
46extern char **environ;
47extern struct expression *master_exp;
48struct expression *master_exp=NULL;
49
50#define PMC_INITIAL_ALLOC 512
51extern char **valid_pmcs;
52char **valid_pmcs = NULL;
53extern int valid_pmc_cnt;
54int valid_pmc_cnt=0;
55extern int pmc_allocated_cnt;
56int pmc_allocated_cnt=0;
57
58/*
59 * The following two varients on popen and pclose with
60 * the cavet that they get you the PID so that you
61 * can supply it to pclose so it can send a SIGTERM
62 *  to the process.
63 */
64static FILE *
65my_popen(const char *command, const char *dir, pid_t *p_pid)
66{
67	FILE *io_out, *io_in;
68	int pdesin[2], pdesout[2];
69	char *argv[4];
70	pid_t pid;
71	char cmd[4];
72	char cmd2[1024];
73	char arg1[4];
74
75	if ((strcmp(dir, "r") != 0) &&
76	    (strcmp(dir, "w") != 0)) {
77		errno = EINVAL;
78		return(NULL);
79	}
80	if (pipe(pdesin) < 0)
81		return (NULL);
82
83	if (pipe(pdesout) < 0) {
84		(void)close(pdesin[0]);
85		(void)close(pdesin[1]);
86		return (NULL);
87	}
88	strcpy(cmd, "sh");
89	strcpy(arg1, "-c");
90	strcpy(cmd2, command);
91	argv[0] = cmd;
92	argv[1] = arg1;
93	argv[2] = cmd2;
94	argv[3] = NULL;
95
96	switch (pid = fork()) {
97	case -1:			/* Error. */
98		(void)close(pdesin[0]);
99		(void)close(pdesin[1]);
100		(void)close(pdesout[0]);
101		(void)close(pdesout[1]);
102		return (NULL);
103		/* NOTREACHED */
104	case 0:				/* Child. */
105		/* Close out un-used sides */
106		(void)close(pdesin[1]);
107		(void)close(pdesout[0]);
108		/* Now prepare the stdin of the process */
109		close(0);
110		(void)dup(pdesin[0]);
111		(void)close(pdesin[0]);
112		/* Now prepare the stdout of the process */
113		close(1);
114		(void)dup(pdesout[1]);
115		/* And lets do stderr just in case */
116		close(2);
117		(void)dup(pdesout[1]);
118		(void)close(pdesout[1]);
119		/* Now run it */
120		execve("/bin/sh", argv, environ);
121		exit(127);
122		/* NOTREACHED */
123	}
124	/* Parent; assume fdopen can't fail. */
125	/* Store the pid */
126	*p_pid = pid;
127	if (strcmp(dir, "r") != 0) {
128		io_out = fdopen(pdesin[1], "w");
129		(void)close(pdesin[0]);
130		(void)close(pdesout[0]);
131		(void)close(pdesout[1]);
132		return(io_out);
133 	} else {
134		/* Prepare the input stream */
135		io_in = fdopen(pdesout[0], "r");
136		(void)close(pdesout[1]);
137		(void)close(pdesin[0]);
138		(void)close(pdesin[1]);
139		return (io_in);
140	}
141}
142
143/*
144 * pclose --
145 *	Pclose returns -1 if stream is not associated with a `popened' command,
146 *	if already `pclosed', or waitpid returns an error.
147 */
148static void
149my_pclose(FILE *io, pid_t the_pid)
150{
151	int pstat;
152	pid_t pid;
153
154	/*
155	 * Find the appropriate file pointer and remove it from the list.
156	 */
157	(void)fclose(io);
158	/* Die if you are not dead! */
159	kill(the_pid, SIGTERM);
160	do {
161		pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
162	} while (pid == -1 && errno == EINTR);
163}
164
165struct counters {
166	struct counters *next_cpu;
167	char counter_name[MAX_NLEN];		/* Name of counter */
168	int cpu;				/* CPU we are on */
169	int pos;				/* Index we are filling to. */
170	uint64_t vals[MAX_COUNTER_SLOTS];	/* Last 64 entries */
171	uint64_t sum;				/* Summary of entries */
172};
173
174extern struct counters *glob_cpu[MAX_CPU];
175struct counters *glob_cpu[MAX_CPU];
176
177extern struct counters *cnts;
178struct counters *cnts=NULL;
179
180extern int ncnts;
181int ncnts=0;
182
183extern int (*expression)(struct counters *, int);
184int (*expression)(struct counters *, int);
185
186static const char *threshold=NULL;
187static const char *command;
188
189struct cpu_entry {
190	const char *name;
191	const char *thresh;
192	const char *command;
193	int (*func)(struct counters *, int);
194	int counters_required;
195};
196
197struct cpu_type {
198	char cputype[32];
199	int number;
200	struct cpu_entry *ents;
201	void (*explain)(const char *name);
202};
203extern struct cpu_type the_cpu;
204struct cpu_type the_cpu;
205
206static void
207explain_name_sb(const char *name)
208{
209	const char *mythresh;
210	if (strcmp(name, "allocstall1") == 0) {
211		printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
212		mythresh = "thresh > .05";
213	} else if (strcmp(name, "allocstall2") == 0) {
214		printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
215		mythresh = "thresh > .05";
216	} else if (strcmp(name, "br_miss") == 0) {
217		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
218		mythresh = "thresh >= .2";
219	} else if (strcmp(name, "splitload") == 0) {
220		printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
221		mythresh = "thresh >= .1";
222	} else if (strcmp(name, "splitstore") == 0) {
223		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
224		mythresh = "thresh >= .01";
225	} else if (strcmp(name, "contested") == 0) {
226		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
227		mythresh = "thresh >= .05";
228	} else if (strcmp(name, "blockstorefwd") == 0) {
229		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
230		mythresh = "thresh >= .05";
231	} else if (strcmp(name, "cache2") == 0) {
232		printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
233		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
234		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
235		printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
236		mythresh = "thresh >= .2";
237	} else if (strcmp(name, "cache1") == 0) {
238		printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
239		mythresh = "thresh >= .2";
240	} else if (strcmp(name, "dtlbmissload") == 0) {
241		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
242		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
243		mythresh = "thresh >= .1";
244	} else if (strcmp(name, "frontendstall") == 0) {
245		printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
246		mythresh = "thresh >= .15";
247	} else if (strcmp(name, "clears") == 0) {
248		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
249		printf("          MACHINE_CLEARS.SMC + \n");
250		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
251		mythresh = "thresh >= .02";
252	} else if (strcmp(name, "microassist") == 0) {
253		printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
254		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
255		mythresh = "thresh >= .05";
256	} else if (strcmp(name, "aliasing_4k") == 0) {
257		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
258		mythresh = "thresh >= .1";
259	} else if (strcmp(name, "fpassist") == 0) {
260		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
261		mythresh = "look for a excessive value";
262	} else if (strcmp(name, "otherassistavx") == 0) {
263		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
264		mythresh = "look for a excessive value";
265	} else if (strcmp(name, "otherassistsse") == 0) {
266		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
267		mythresh = "look for a excessive value";
268	} else if (strcmp(name, "eff1") == 0) {
269		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
270		mythresh = "thresh < .9";
271	} else if (strcmp(name, "eff2") == 0) {
272		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
273		mythresh = "thresh > 1.0";
274	} else if (strcmp(name, "dtlbmissstore") == 0) {
275		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
276		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
277		mythresh = "thresh >= .05";
278	} else {
279		printf("Unknown name:%s\n", name);
280		mythresh = "unknown entry";
281        }
282	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
283}
284
285static void
286explain_name_ib(const char *name)
287{
288	const char *mythresh;
289	if (strcmp(name, "br_miss") == 0) {
290		printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
291		printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
292		printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
293		mythresh = "thresh >= .2";
294	} else if (strcmp(name, "eff1") == 0) {
295		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
296		mythresh = "thresh < .9";
297	} else if (strcmp(name, "eff2") == 0) {
298		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
299		mythresh = "thresh > 1.0";
300	} else if (strcmp(name, "cache1") == 0) {
301		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
302		mythresh = "thresh >= .2";
303	} else if (strcmp(name, "cache2") == 0) {
304		printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
305		mythresh = "thresh >= .2";
306	} else if (strcmp(name, "itlbmiss") == 0) {
307		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
308		mythresh = "thresh > .05";
309	} else if (strcmp(name, "icachemiss") == 0) {
310		printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
311		mythresh = "thresh > .05";
312	} else if (strcmp(name, "lcpstall") == 0) {
313		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
314		mythresh = "thresh > .05";
315	} else if (strcmp(name, "datashare") == 0) {
316		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
317		mythresh = "thresh > .05";
318	} else if (strcmp(name, "blockstorefwd") == 0) {
319		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
320		mythresh = "thresh >= .05";
321	} else if (strcmp(name, "splitload") == 0) {
322		printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
323		printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
324		mythresh = "thresh >= .1";
325	} else if (strcmp(name, "splitstore") == 0) {
326		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
327		mythresh = "thresh >= .01";
328	} else if (strcmp(name, "aliasing_4k") == 0) {
329		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
330		mythresh = "thresh >= .1";
331	} else if (strcmp(name, "dtlbmissload") == 0) {
332		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
333		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
334		mythresh = "thresh >= .1";
335	} else if (strcmp(name, "dtlbmissstore") == 0) {
336		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
337		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
338		mythresh = "thresh >= .05";
339	} else if (strcmp(name, "contested") == 0) {
340		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
341		mythresh = "thresh >= .05";
342	} else if (strcmp(name, "clears") == 0) {
343		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
344		printf("          MACHINE_CLEARS.SMC + \n");
345		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
346		mythresh = "thresh >= .02";
347	} else if (strcmp(name, "microassist") == 0) {
348		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
349		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
350		mythresh = "thresh >= .05";
351	} else if (strcmp(name, "fpassist") == 0) {
352		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
353		mythresh = "look for a excessive value";
354	} else if (strcmp(name, "otherassistavx") == 0) {
355		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
356		mythresh = "look for a excessive value";
357	} else if (strcmp(name, "otherassistsse") == 0) {
358		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
359		mythresh = "look for a excessive value";
360	} else {
361		printf("Unknown name:%s\n", name);
362		mythresh = "unknown entry";
363        }
364	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
365}
366
367
368static void
369explain_name_has(const char *name)
370{
371	const char *mythresh;
372	if (strcmp(name, "eff1") == 0) {
373		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
374		mythresh = "thresh < .75";
375	} else if (strcmp(name, "eff2") == 0) {
376		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
377		mythresh = "thresh > 1.0";
378	} else if (strcmp(name, "itlbmiss") == 0) {
379		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
380		mythresh = "thresh > .05";
381	} else if (strcmp(name, "icachemiss") == 0) {
382		printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
383		mythresh = "thresh > .05";
384	} else if (strcmp(name, "lcpstall") == 0) {
385		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
386		mythresh = "thresh > .05";
387	} else if (strcmp(name, "cache1") == 0) {
388		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
389		mythresh = "thresh >= .2";
390	} else if (strcmp(name, "cache2") == 0) {
391		printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
392		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
393		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
394		printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
395		mythresh = "thresh >= .2";
396	} else if (strcmp(name, "contested") == 0) {
397		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
398		mythresh = "thresh >= .05";
399	} else if (strcmp(name, "datashare") == 0) {
400		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
401		mythresh = "thresh > .05";
402	} else if (strcmp(name, "blockstorefwd") == 0) {
403		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
404		mythresh = "thresh >= .05";
405	} else if (strcmp(name, "splitload") == 0) {
406		printf("Examine  (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
407		mythresh = "thresh >= .1";
408	} else if (strcmp(name, "splitstore") == 0) {
409		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
410		mythresh = "thresh >= .01";
411	} else if (strcmp(name, "aliasing_4k") == 0) {
412		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
413		mythresh = "thresh >= .1";
414	} else if (strcmp(name, "dtlbmissload") == 0) {
415		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
416		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
417		mythresh = "thresh >= .1";
418	} else if (strcmp(name, "br_miss") == 0) {
419		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
420		mythresh = "thresh >= .2";
421	} else if (strcmp(name, "clears") == 0) {
422		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
423		printf("          MACHINE_CLEARS.SMC + \n");
424		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
425		mythresh = "thresh >= .02";
426	} else if (strcmp(name, "microassist") == 0) {
427		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
428		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
429		mythresh = "thresh >= .05";
430	} else if (strcmp(name, "fpassist") == 0) {
431		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
432		mythresh = "look for a excessive value";
433	} else if (strcmp(name, "otherassistavx") == 0) {
434		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
435		mythresh = "look for a excessive value";
436	} else if (strcmp(name, "otherassistsse") == 0) {
437		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
438		mythresh = "look for a excessive value";
439	} else {
440		printf("Unknown name:%s\n", name);
441		mythresh = "unknown entry";
442        }
443	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
444}
445
446
447
448static struct counters *
449find_counter(struct counters *base, const char *name)
450{
451	struct counters *at;
452	int len;
453
454	at = base;
455	len = strlen(name);
456	while(at) {
457		if (strncmp(at->counter_name, name, len) == 0) {
458			return(at);
459		}
460		at = at->next_cpu;
461	}
462	printf("Can't find counter %s\n", name);
463	printf("We have:\n");
464	at = base;
465	while(at) {
466		printf("- %s\n", at->counter_name);
467		at = at->next_cpu;
468	}
469	exit(-1);
470}
471
472static int
473allocstall1(struct counters *cpu, int pos)
474{
475/*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
476	int ret;
477	struct counters *partial;
478	struct counters *unhalt;
479	double un, par, res;
480	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
481	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
482	if (pos != -1) {
483		par = partial->vals[pos] * 1.0;
484		un = unhalt->vals[pos] * 1.0;
485	} else {
486		par = partial->sum * 1.0;
487		un = unhalt->sum * 1.0;
488	}
489	res = par/un;
490	ret = printf("%1.3f", res);
491	return(ret);
492}
493
494static int
495allocstall2(struct counters *cpu, int pos)
496{
497/*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
498	int ret;
499	struct counters *partial;
500	struct counters *unhalt;
501	double un, par, res;
502	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
503	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
504	if (pos != -1) {
505		par = partial->vals[pos] * 1.0;
506		un = unhalt->vals[pos] * 1.0;
507	} else {
508		par = partial->sum * 1.0;
509		un = unhalt->sum * 1.0;
510	}
511	res = par/un;
512	ret = printf("%1.3f", res);
513	return(ret);
514}
515
516static int
517br_mispredict(struct counters *cpu, int pos)
518{
519	struct counters *brctr;
520	struct counters *unhalt;
521	int ret;
522/*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
523	double br, un, con, res;
524	con = 20.0;
525
526	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
527        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
528	if (pos != -1) {
529		br = brctr->vals[pos] * 1.0;
530		un = unhalt->vals[pos] * 1.0;
531	} else {
532		br = brctr->sum * 1.0;
533		un = unhalt->sum * 1.0;
534	}
535	res = (con * br)/un;
536 	ret = printf("%1.3f", res);
537	return(ret);
538}
539
540static int
541br_mispredictib(struct counters *cpu, int pos)
542{
543	struct counters *brctr;
544	struct counters *unhalt;
545	struct counters *clear, *clear2, *clear3;
546	struct counters *uops;
547	struct counters *recv;
548	struct counters *iss;
549/*	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
550	int ret;
551        /*
552	 * (BR_MISP_RETIRED.ALL_BRANCHES /
553	 *         (BR_MISP_RETIRED.ALL_BRANCHES +
554	 *          MACHINE_CLEAR.COUNT) *
555	 *	   ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
556	 *
557	 */
558	double br, cl, cl2, cl3, uo, re, un, con, res, is;
559	con = 4.0;
560
561	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
562        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
563	clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
564	clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
565	clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
566	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
567	iss = find_counter(cpu, "UOPS_ISSUED.ANY");
568	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
569	if (pos != -1) {
570		br = brctr->vals[pos] * 1.0;
571		cl = clear->vals[pos] * 1.0;
572		cl2 = clear2->vals[pos] * 1.0;
573		cl3 = clear3->vals[pos] * 1.0;
574		uo = uops->vals[pos] * 1.0;
575		re = recv->vals[pos] * 1.0;
576		is = iss->vals[pos] * 1.0;
577		un = unhalt->vals[pos] * 1.0;
578	} else {
579		br = brctr->sum * 1.0;
580		cl = clear->sum * 1.0;
581		cl2 = clear2->sum * 1.0;
582		cl3 = clear3->sum * 1.0;
583		uo = uops->sum * 1.0;
584		re = recv->sum * 1.0;
585		is = iss->sum * 1.0;
586		un = unhalt->sum * 1.0;
587	}
588	res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
589 	ret = printf("%1.3f", res);
590	return(ret);
591}
592
593
594static int
595br_mispredict_broad(struct counters *cpu, int pos)
596{
597	struct counters *brctr;
598	struct counters *unhalt;
599	struct counters *clear;
600	struct counters *uops;
601	struct counters *uops_ret;
602	struct counters *recv;
603	int ret;
604	double br, cl, uo, uo_r, re, con, un, res;
605
606	con = 4.0;
607
608	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
609        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
610	clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
611	uops = find_counter(cpu, "UOPS_ISSUED.ANY");
612	uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
613	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
614
615	if (pos != -1) {
616		un = unhalt->vals[pos] * 1.0;
617		br = brctr->vals[pos] * 1.0;
618		cl = clear->vals[pos] * 1.0;
619		uo = uops->vals[pos] * 1.0;
620		uo_r = uops_ret->vals[pos] * 1.0;
621		re = recv->vals[pos] * 1.0;
622	} else {
623		un = unhalt->sum * 1.0;
624		br = brctr->sum * 1.0;
625		cl = clear->sum * 1.0;
626		uo = uops->sum * 1.0;
627		uo_r = uops_ret->sum * 1.0;
628		re = recv->sum * 1.0;
629	}
630	res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
631 	ret = printf("%1.3f", res);
632	return(ret);
633}
634
635static int
636splitloadib(struct counters *cpu, int pos)
637{
638	int ret;
639	struct counters *mem;
640	struct counters *l1d, *ldblock;
641	struct counters *unhalt;
642	double un, memd, res, l1, ldb;
643        /*
644	 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
645	 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
646	 */
647
648	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
649	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
650	l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
651	ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
652	if (pos != -1) {
653		memd = mem->vals[pos] * 1.0;
654		l1 = l1d->vals[pos] * 1.0;
655		ldb = ldblock->vals[pos] * 1.0;
656		un = unhalt->vals[pos] * 1.0;
657	} else {
658		memd = mem->sum * 1.0;
659		l1 = l1d->sum * 1.0;
660		ldb = ldblock->sum * 1.0;
661		un = unhalt->sum * 1.0;
662	}
663	res = ((l1 / memd) * ldb)/un;
664	ret = printf("%1.3f", res);
665	return(ret);
666}
667
668
669static int
670splitload(struct counters *cpu, int pos)
671{
672	int ret;
673	struct counters *mem;
674	struct counters *unhalt;
675	double con, un, memd, res;
676/*  4  - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
677
678	con = 5.0;
679	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
680	mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
681	if (pos != -1) {
682		memd = mem->vals[pos] * 1.0;
683		un = unhalt->vals[pos] * 1.0;
684	} else {
685		memd = mem->sum * 1.0;
686		un = unhalt->sum * 1.0;
687	}
688	res = (memd * con)/un;
689	ret = printf("%1.3f", res);
690	return(ret);
691}
692
693
694static int
695splitload_sb(struct counters *cpu, int pos)
696{
697	int ret;
698	struct counters *mem;
699	struct counters *unhalt;
700	double con, un, memd, res;
701/*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
702
703	con = 5.0;
704	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
705	mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
706	if (pos != -1) {
707		memd = mem->vals[pos] * 1.0;
708		un = unhalt->vals[pos] * 1.0;
709	} else {
710		memd = mem->sum * 1.0;
711		un = unhalt->sum * 1.0;
712	}
713	res = (memd * con)/un;
714	ret = printf("%1.3f", res);
715	return(ret);
716}
717
718
719static int
720splitstore_sb(struct counters *cpu, int pos)
721{
722        /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
723	int ret;
724	struct counters *mem_split;
725	struct counters *mem_stores;
726	double memsplit, memstore, res;
727	mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
728	mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
729	if (pos != -1) {
730		memsplit = mem_split->vals[pos] * 1.0;
731		memstore = mem_stores->vals[pos] * 1.0;
732	} else {
733		memsplit = mem_split->sum * 1.0;
734		memstore = mem_stores->sum * 1.0;
735	}
736	res = memsplit/memstore;
737	ret = printf("%1.3f", res);
738	return(ret);
739}
740
741
742
743static int
744splitstore(struct counters *cpu, int pos)
745{
746        /*  5  - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
747	int ret;
748	struct counters *mem_split;
749	struct counters *mem_stores;
750	double memsplit, memstore, res;
751	mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
752	mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
753	if (pos != -1) {
754		memsplit = mem_split->vals[pos] * 1.0;
755		memstore = mem_stores->vals[pos] * 1.0;
756	} else {
757		memsplit = mem_split->sum * 1.0;
758		memstore = mem_stores->sum * 1.0;
759	}
760	res = memsplit/memstore;
761	ret = printf("%1.3f", res);
762	return(ret);
763}
764
765
766static int
767contested(struct counters *cpu, int pos)
768{
769        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
770	int ret;
771	struct counters *mem;
772	struct counters *unhalt;
773	double con, un, memd, res;
774
775	con = 60.0;
776	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
777	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
778	if (pos != -1) {
779		memd = mem->vals[pos] * 1.0;
780		un = unhalt->vals[pos] * 1.0;
781	} else {
782		memd = mem->sum * 1.0;
783		un = unhalt->sum * 1.0;
784	}
785	res = (memd * con)/un;
786	ret = printf("%1.3f", res);
787	return(ret);
788}
789
790static int
791contested_has(struct counters *cpu, int pos)
792{
793        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
794	int ret;
795	struct counters *mem;
796	struct counters *unhalt;
797	double con, un, memd, res;
798
799	con = 84.0;
800	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
801	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
802	if (pos != -1) {
803		memd = mem->vals[pos] * 1.0;
804		un = unhalt->vals[pos] * 1.0;
805	} else {
806		memd = mem->sum * 1.0;
807		un = unhalt->sum * 1.0;
808	}
809	res = (memd * con)/un;
810	ret = printf("%1.3f", res);
811	return(ret);
812}
813
814static int
815contestedbroad(struct counters *cpu, int pos)
816{
817        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
818	int ret;
819	struct counters *mem;
820	struct counters *mem2;
821	struct counters *unhalt;
822	double con, un, memd, memtoo, res;
823
824	con = 84.0;
825	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
826	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
827	mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
828
829	if (pos != -1) {
830		memd = mem->vals[pos] * 1.0;
831		memtoo = mem2->vals[pos] * 1.0;
832		un = unhalt->vals[pos] * 1.0;
833	} else {
834		memd = mem->sum * 1.0;
835		memtoo = mem2->sum * 1.0;
836		un = unhalt->sum * 1.0;
837	}
838	res = ((memd * con) + memtoo)/un;
839	ret = printf("%1.3f", res);
840	return(ret);
841}
842
843
844static int
845blockstoreforward(struct counters *cpu, int pos)
846{
847        /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
848	int ret;
849	struct counters *ldb;
850	struct counters *unhalt;
851	double con, un, ld, res;
852
853	con = 13.0;
854	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
855	ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
856	if (pos != -1) {
857		ld = ldb->vals[pos] * 1.0;
858		un = unhalt->vals[pos] * 1.0;
859	} else {
860		ld = ldb->sum * 1.0;
861		un = unhalt->sum * 1.0;
862	}
863	res = (ld * con)/un;
864	ret = printf("%1.3f", res);
865	return(ret);
866}
867
868static int
869cache2(struct counters *cpu, int pos)
870{
871	/* ** Suspect ***
872	 *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
873	 *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
874	 */
875	int ret;
876	struct counters *mem1, *mem2, *mem3;
877	struct counters *unhalt;
878	double con1, con2, con3, un, me_1, me_2, me_3, res;
879
880	con1 = 26.0;
881	con2 = 43.0;
882	con3 = 60.0;
883	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
884/* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
885	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
886	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
887	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
888	if (pos != -1) {
889		me_1 = mem1->vals[pos] * 1.0;
890		me_2 = mem2->vals[pos] * 1.0;
891		me_3 = mem3->vals[pos] * 1.0;
892		un = unhalt->vals[pos] * 1.0;
893	} else {
894		me_1 = mem1->sum * 1.0;
895		me_2 = mem2->sum * 1.0;
896		me_3 = mem3->sum * 1.0;
897		un = unhalt->sum * 1.0;
898	}
899	res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
900	ret = printf("%1.3f", res);
901	return(ret);
902}
903
904static int
905datasharing(struct counters *cpu, int pos)
906{
907	/*
908	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
909	 */
910	int ret;
911	struct counters *mem;
912	struct counters *unhalt;
913	double con, res, me, un;
914
915	con = 43.0;
916	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
917	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
918	if (pos != -1) {
919		me = mem->vals[pos] * 1.0;
920		un = unhalt->vals[pos] * 1.0;
921	} else {
922		me = mem->sum * 1.0;
923		un = unhalt->sum * 1.0;
924	}
925	res = (me * con)/un;
926	ret = printf("%1.3f", res);
927	return(ret);
928
929}
930
931
932static int
933datasharing_has(struct counters *cpu, int pos)
934{
935	/*
936	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
937	 */
938	int ret;
939	struct counters *mem;
940	struct counters *unhalt;
941	double con, res, me, un;
942
943	con = 72.0;
944	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
945	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
946	if (pos != -1) {
947		me = mem->vals[pos] * 1.0;
948		un = unhalt->vals[pos] * 1.0;
949	} else {
950		me = mem->sum * 1.0;
951		un = unhalt->sum * 1.0;
952	}
953	res = (me * con)/un;
954	ret = printf("%1.3f", res);
955	return(ret);
956
957}
958
959
960static int
961cache2ib(struct counters *cpu, int pos)
962{
963        /*
964	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
965	 */
966	int ret;
967	struct counters *mem;
968	struct counters *unhalt;
969	double con, un, me, res;
970
971	con = 29.0;
972	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
973	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
974	if (pos != -1) {
975		me = mem->vals[pos] * 1.0;
976		un = unhalt->vals[pos] * 1.0;
977	} else {
978		me = mem->sum * 1.0;
979		un = unhalt->sum * 1.0;
980	}
981	res = (con * me)/un;
982	ret = printf("%1.3f", res);
983	return(ret);
984}
985
986static int
987cache2has(struct counters *cpu, int pos)
988{
989	/*
990	 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
991	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
992	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
993	 *           / CPU_CLK_UNHALTED.THREAD_P
994	 */
995	int ret;
996	struct counters *mem1, *mem2, *mem3;
997	struct counters *unhalt;
998	double con1, con2, con3, un, me1, me2, me3, res;
999
1000	con1 = 36.0;
1001	con2 = 72.0;
1002	con3 = 84.0;
1003	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1004	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
1005	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
1006	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
1007	if (pos != -1) {
1008		me1 = mem1->vals[pos] * 1.0;
1009		me2 = mem2->vals[pos] * 1.0;
1010		me3 = mem3->vals[pos] * 1.0;
1011		un = unhalt->vals[pos] * 1.0;
1012	} else {
1013		me1 = mem1->sum * 1.0;
1014		me2 = mem2->sum * 1.0;
1015		me3 = mem3->sum * 1.0;
1016		un = unhalt->sum * 1.0;
1017	}
1018	res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
1019	ret = printf("%1.3f", res);
1020	return(ret);
1021}
1022
1023
1024static int
1025cache2broad(struct counters *cpu, int pos)
1026{
1027        /*
1028	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
1029	 */
1030	int ret;
1031	struct counters *mem;
1032	struct counters *unhalt;
1033	double con, un, me, res;
1034
1035	con = 36.0;
1036	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1037	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
1038	if (pos != -1) {
1039		me = mem->vals[pos] * 1.0;
1040		un = unhalt->vals[pos] * 1.0;
1041	} else {
1042		me = mem->sum * 1.0;
1043		un = unhalt->sum * 1.0;
1044	}
1045	res = (con * me)/un;
1046	ret = printf("%1.3f", res);
1047	return(ret);
1048}
1049
1050
1051static int
1052cache1(struct counters *cpu, int pos)
1053{
1054	/*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1055	int ret;
1056	struct counters *mem;
1057	struct counters *unhalt;
1058	double con, un, me, res;
1059
1060	con = 180.0;
1061	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1062	mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
1063	if (pos != -1) {
1064		me = mem->vals[pos] * 1.0;
1065		un = unhalt->vals[pos] * 1.0;
1066	} else {
1067		me = mem->sum * 1.0;
1068		un = unhalt->sum * 1.0;
1069	}
1070	res = (me * con)/un;
1071	ret = printf("%1.3f", res);
1072	return(ret);
1073}
1074
1075static int
1076cache1ib(struct counters *cpu, int pos)
1077{
1078	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1079	int ret;
1080	struct counters *mem;
1081	struct counters *unhalt;
1082	double con, un, me, res;
1083
1084	con = 180.0;
1085	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1086	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
1087	if (pos != -1) {
1088		me = mem->vals[pos] * 1.0;
1089		un = unhalt->vals[pos] * 1.0;
1090	} else {
1091		me = mem->sum * 1.0;
1092		un = unhalt->sum * 1.0;
1093	}
1094	res = (me * con)/un;
1095	ret = printf("%1.3f", res);
1096	return(ret);
1097}
1098
1099
1100static int
1101cache1broad(struct counters *cpu, int pos)
1102{
1103	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1104	int ret;
1105	struct counters *mem;
1106	struct counters *unhalt;
1107	double con, un, me, res;
1108
1109	con = 180.0;
1110	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1111	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
1112	if (pos != -1) {
1113		me = mem->vals[pos] * 1.0;
1114		un = unhalt->vals[pos] * 1.0;
1115	} else {
1116		me = mem->sum * 1.0;
1117		un = unhalt->sum * 1.0;
1118	}
1119	res = (me * con)/un;
1120	ret = printf("%1.3f", res);
1121	return(ret);
1122}
1123
1124
1125static int
1126dtlb_missload(struct counters *cpu, int pos)
1127{
1128	/* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
1129	int ret;
1130	struct counters *dtlb_m, *dtlb_d;
1131	struct counters *unhalt;
1132	double con, un, d1, d2, res;
1133
1134	con = 7.0;
1135	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1136	dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
1137	dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
1138	if (pos != -1) {
1139		d1 = dtlb_m->vals[pos] * 1.0;
1140		d2 = dtlb_d->vals[pos] * 1.0;
1141		un = unhalt->vals[pos] * 1.0;
1142	} else {
1143		d1 = dtlb_m->sum * 1.0;
1144		d2 = dtlb_d->sum * 1.0;
1145		un = unhalt->sum * 1.0;
1146	}
1147	res = ((d1 * con) + d2)/un;
1148	ret = printf("%1.3f", res);
1149	return(ret);
1150}
1151
1152static int
1153dtlb_missstore(struct counters *cpu, int pos)
1154{
1155        /*
1156	 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
1157	 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
1158	 */
1159        int ret;
1160        struct counters *dtsb_m, *dtsb_d;
1161        struct counters *unhalt;
1162        double con, un, d1, d2, res;
1163
1164        con = 7.0;
1165        unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1166        dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
1167        dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
1168        if (pos != -1) {
1169                d1 = dtsb_m->vals[pos] * 1.0;
1170                d2 = dtsb_d->vals[pos] * 1.0;
1171                un = unhalt->vals[pos] * 1.0;
1172        } else {
1173                d1 = dtsb_m->sum * 1.0;
1174                d2 = dtsb_d->sum * 1.0;
1175                un = unhalt->sum * 1.0;
1176        }
1177        res = ((d1 * con) + d2)/un;
1178        ret = printf("%1.3f", res);
1179        return(ret);
1180}
1181
1182static int
1183itlb_miss(struct counters *cpu, int pos)
1184{
1185	/* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
1186	int ret;
1187	struct counters *itlb;
1188	struct counters *unhalt;
1189	double un, d1, res;
1190
1191	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1192	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1193	if (pos != -1) {
1194		d1 = itlb->vals[pos] * 1.0;
1195		un = unhalt->vals[pos] * 1.0;
1196	} else {
1197		d1 = itlb->sum * 1.0;
1198		un = unhalt->sum * 1.0;
1199	}
1200	res = d1/un;
1201	ret = printf("%1.3f", res);
1202	return(ret);
1203}
1204
1205
1206static int
1207itlb_miss_broad(struct counters *cpu, int pos)
1208{
1209	/* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P   */
1210	int ret;
1211	struct counters *itlb;
1212	struct counters *unhalt;
1213	struct counters *four_k;
1214	double un, d1, res, k;
1215
1216	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1217	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1218	four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
1219	if (pos != -1) {
1220		d1 = itlb->vals[pos] * 1.0;
1221		un = unhalt->vals[pos] * 1.0;
1222		k = four_k->vals[pos] * 1.0;
1223	} else {
1224		d1 = itlb->sum * 1.0;
1225		un = unhalt->sum * 1.0;
1226		k = four_k->sum * 1.0;
1227	}
1228	res = (7.0 * k + d1)/un;
1229	ret = printf("%1.3f", res);
1230	return(ret);
1231}
1232
1233
1234static int
1235icache_miss(struct counters *cpu, int pos)
1236{
1237	/* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1238
1239	int ret;
1240	struct counters *itlb, *icache;
1241	struct counters *unhalt;
1242	double un, d1, ic, res;
1243
1244	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1245	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1246	icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1247	if (pos != -1) {
1248		d1 = itlb->vals[pos] * 1.0;
1249		ic = icache->vals[pos] * 1.0;
1250		un = unhalt->vals[pos] * 1.0;
1251	} else {
1252		d1 = itlb->sum * 1.0;
1253		ic = icache->sum * 1.0;
1254		un = unhalt->sum * 1.0;
1255	}
1256	res = (ic-d1)/un;
1257	ret = printf("%1.3f", res);
1258	return(ret);
1259
1260}
1261
1262static int
1263icache_miss_has(struct counters *cpu, int pos)
1264{
1265	/* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1266
1267	int ret;
1268	struct counters *icache;
1269	struct counters *unhalt;
1270	double un, con, ic, res;
1271
1272	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1273	icache = find_counter(cpu, "ICACHE.MISSES");
1274	con = 36.0;
1275	if (pos != -1) {
1276		ic = icache->vals[pos] * 1.0;
1277		un = unhalt->vals[pos] * 1.0;
1278	} else {
1279		ic = icache->sum * 1.0;
1280		un = unhalt->sum * 1.0;
1281	}
1282	res = (con * ic)/un;
1283	ret = printf("%1.3f", res);
1284	return(ret);
1285
1286}
1287
1288static int
1289lcp_stall(struct counters *cpu, int pos)
1290{
1291         /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1292	int ret;
1293	struct counters *ild;
1294	struct counters *unhalt;
1295	double un, d1, res;
1296
1297	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1298	ild = find_counter(cpu, "ILD_STALL.LCP");
1299	if (pos != -1) {
1300		d1 = ild->vals[pos] * 1.0;
1301		un = unhalt->vals[pos] * 1.0;
1302	} else {
1303		d1 = ild->sum * 1.0;
1304		un = unhalt->sum * 1.0;
1305	}
1306	res = d1/un;
1307	ret = printf("%1.3f", res);
1308	return(ret);
1309
1310}
1311
1312
1313static int
1314frontendstall(struct counters *cpu, int pos)
1315{
1316      /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1317	int ret;
1318	struct counters *idq;
1319	struct counters *unhalt;
1320	double con, un, id, res;
1321
1322	con = 4.0;
1323	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1324	idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1325	if (pos != -1) {
1326		id = idq->vals[pos] * 1.0;
1327		un = unhalt->vals[pos] * 1.0;
1328	} else {
1329		id = idq->sum * 1.0;
1330		un = unhalt->sum * 1.0;
1331	}
1332	res = id/(un * con);
1333	ret = printf("%1.3f", res);
1334	return(ret);
1335}
1336
1337static int
1338clears(struct counters *cpu, int pos)
1339{
1340	/* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1341	 *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
1342
1343	int ret;
1344	struct counters *clr1, *clr2, *clr3;
1345	struct counters *unhalt;
1346	double con, un, cl1, cl2, cl3, res;
1347
1348	con = 100.0;
1349	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1350	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1351	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1352	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1353
1354	if (pos != -1) {
1355		cl1 = clr1->vals[pos] * 1.0;
1356		cl2 = clr2->vals[pos] * 1.0;
1357		cl3 = clr3->vals[pos] * 1.0;
1358		un = unhalt->vals[pos] * 1.0;
1359	} else {
1360		cl1 = clr1->sum * 1.0;
1361		cl2 = clr2->sum * 1.0;
1362		cl3 = clr3->sum * 1.0;
1363		un = unhalt->sum * 1.0;
1364	}
1365	res = ((cl1 + cl2 + cl3) * con)/un;
1366	ret = printf("%1.3f", res);
1367	return(ret);
1368}
1369
1370
1371
1372static int
1373clears_broad(struct counters *cpu, int pos)
1374{
1375	int ret;
1376	struct counters *clr1, *clr2, *clr3, *cyc;
1377	struct counters *unhalt;
1378	double con, un, cl1, cl2, cl3, cy, res;
1379
1380	con = 100.0;
1381	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1382	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1383	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1384	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1385	cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
1386	if (pos != -1) {
1387		cl1 = clr1->vals[pos] * 1.0;
1388		cl2 = clr2->vals[pos] * 1.0;
1389		cl3 = clr3->vals[pos] * 1.0;
1390		cy = cyc->vals[pos] * 1.0;
1391		un = unhalt->vals[pos] * 1.0;
1392	} else {
1393		cl1 = clr1->sum * 1.0;
1394		cl2 = clr2->sum * 1.0;
1395		cl3 = clr3->sum * 1.0;
1396		cy = cyc->sum * 1.0;
1397		un = unhalt->sum * 1.0;
1398	}
1399	/* Formula not listed but extrapulated to add the cy ?? */
1400	res = ((cl1 + cl2 + cl3 + cy) * con)/un;
1401	ret = printf("%1.3f", res);
1402	return(ret);
1403}
1404
1405
1406
1407
1408
1409static int
1410microassist(struct counters *cpu, int pos)
1411{
1412	/* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1413	int ret;
1414	struct counters *idq;
1415	struct counters *unhalt;
1416	double un, id, res, con;
1417
1418	con = 4.0;
1419	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1420	idq = find_counter(cpu, "IDQ.MS_UOPS");
1421	if (pos != -1) {
1422		id = idq->vals[pos] * 1.0;
1423		un = unhalt->vals[pos] * 1.0;
1424	} else {
1425		id = idq->sum * 1.0;
1426		un = unhalt->sum * 1.0;
1427	}
1428	res = id/(un * con);
1429	ret = printf("%1.3f", res);
1430	return(ret);
1431}
1432
1433
1434static int
1435microassist_broad(struct counters *cpu, int pos)
1436{
1437	int ret;
1438	struct counters *idq;
1439	struct counters *unhalt;
1440	struct counters *uopiss;
1441	struct counters *uopret;
1442	double un, id, res, con, uoi, uor;
1443
1444	con = 4.0;
1445	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1446	idq = find_counter(cpu, "IDQ.MS_UOPS");
1447	uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
1448	uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1449	if (pos != -1) {
1450		id = idq->vals[pos] * 1.0;
1451		un = unhalt->vals[pos] * 1.0;
1452		uoi = uopiss->vals[pos] * 1.0;
1453		uor = uopret->vals[pos] * 1.0;
1454	} else {
1455		id = idq->sum * 1.0;
1456		un = unhalt->sum * 1.0;
1457		uoi = uopiss->sum * 1.0;
1458		uor = uopret->sum * 1.0;
1459	}
1460	res = (uor/uoi) * (id/(un * con));
1461	ret = printf("%1.3f", res);
1462	return(ret);
1463}
1464
1465
1466static int
1467aliasing(struct counters *cpu, int pos)
1468{
1469	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1470	int ret;
1471	struct counters *ld;
1472	struct counters *unhalt;
1473	double un, lds, con, res;
1474
1475	con = 5.0;
1476	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1477	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1478	if (pos != -1) {
1479		lds = ld->vals[pos] * 1.0;
1480		un = unhalt->vals[pos] * 1.0;
1481	} else {
1482		lds = ld->sum * 1.0;
1483		un = unhalt->sum * 1.0;
1484	}
1485	res = (lds * con)/un;
1486	ret = printf("%1.3f", res);
1487	return(ret);
1488}
1489
1490static int
1491aliasing_broad(struct counters *cpu, int pos)
1492{
1493	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1494	int ret;
1495	struct counters *ld;
1496	struct counters *unhalt;
1497	double un, lds, con, res;
1498
1499	con = 7.0;
1500	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1501	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1502	if (pos != -1) {
1503		lds = ld->vals[pos] * 1.0;
1504		un = unhalt->vals[pos] * 1.0;
1505	} else {
1506		lds = ld->sum * 1.0;
1507		un = unhalt->sum * 1.0;
1508	}
1509	res = (lds * con)/un;
1510	ret = printf("%1.3f", res);
1511	return(ret);
1512}
1513
1514
1515static int
1516fpassists(struct counters *cpu, int pos)
1517{
1518	/* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1519	int ret;
1520	struct counters *fp;
1521	struct counters *inst;
1522	double un, fpd, res;
1523
1524	inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1525	fp = find_counter(cpu, "FP_ASSIST.ANY");
1526	if (pos != -1) {
1527		fpd = fp->vals[pos] * 1.0;
1528		un = inst->vals[pos] * 1.0;
1529	} else {
1530		fpd = fp->sum * 1.0;
1531		un = inst->sum * 1.0;
1532	}
1533	res = fpd/un;
1534	ret = printf("%1.3f", res);
1535	return(ret);
1536}
1537
1538static int
1539otherassistavx(struct counters *cpu, int pos)
1540{
1541	/* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
1542	int ret;
1543	struct counters *oth;
1544	struct counters *unhalt;
1545	double un, ot, con, res;
1546
1547	con = 75.0;
1548	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1549	oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1550	if (pos != -1) {
1551		ot = oth->vals[pos] * 1.0;
1552		un = unhalt->vals[pos] * 1.0;
1553	} else {
1554		ot = oth->sum * 1.0;
1555		un = unhalt->sum * 1.0;
1556	}
1557	res = (ot * con)/un;
1558	ret = printf("%1.3f", res);
1559	return(ret);
1560}
1561
1562static int
1563otherassistsse(struct counters *cpu, int pos)
1564{
1565
1566	int ret;
1567	struct counters *oth;
1568	struct counters *unhalt;
1569	double un, ot, con, res;
1570
1571	/* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
1572	con = 75.0;
1573	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1574	oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1575	if (pos != -1) {
1576		ot = oth->vals[pos] * 1.0;
1577		un = unhalt->vals[pos] * 1.0;
1578	} else {
1579		ot = oth->sum * 1.0;
1580		un = unhalt->sum * 1.0;
1581	}
1582	res = (ot * con)/un;
1583	ret = printf("%1.3f", res);
1584	return(ret);
1585}
1586
1587static int
1588efficiency1(struct counters *cpu, int pos)
1589{
1590
1591	int ret;
1592	struct counters *uops;
1593	struct counters *unhalt;
1594	double un, ot, con, res;
1595
1596        /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1597	con = 4.0;
1598	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1599	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1600	if (pos != -1) {
1601		ot = uops->vals[pos] * 1.0;
1602		un = unhalt->vals[pos] * 1.0;
1603	} else {
1604		ot = uops->sum * 1.0;
1605		un = unhalt->sum * 1.0;
1606	}
1607	res = ot/(con * un);
1608	ret = printf("%1.3f", res);
1609	return(ret);
1610}
1611
1612static int
1613efficiency2(struct counters *cpu, int pos)
1614{
1615
1616	int ret;
1617	struct counters *uops;
1618	struct counters *unhalt;
1619	double un, ot, res;
1620
1621        /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1622	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1623	uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1624	if (pos != -1) {
1625		ot = uops->vals[pos] * 1.0;
1626		un = unhalt->vals[pos] * 1.0;
1627	} else {
1628		ot = uops->sum * 1.0;
1629		un = unhalt->sum * 1.0;
1630	}
1631	res = un/ot;
1632	ret = printf("%1.3f", res);
1633	return(ret);
1634}
1635
1636#define SANDY_BRIDGE_COUNT 20
1637static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1638/*01*/	{ "allocstall1", "thresh > .05",
1639	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1640	  allocstall1, 2 },
1641/* -- not defined for SB right (partial-rat_stalls) 02*/
1642        { "allocstall2", "thresh > .05",
1643	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
1644	  allocstall2, 2 },
1645/*03*/	{ "br_miss", "thresh >= .2",
1646	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1647	  br_mispredict, 2 },
1648/*04*/	{ "splitload", "thresh >= .1",
1649	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1650	  splitload_sb, 2 },
1651/* 05*/	{ "splitstore", "thresh >= .01",
1652	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1653	  splitstore_sb, 2 },
1654/*06*/	{ "contested", "thresh >= .05",
1655	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1656	  contested, 2 },
1657/*07*/	{ "blockstorefwd", "thresh >= .05",
1658	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1659	  blockstoreforward, 2 },
1660/*08*/	{ "cache2", "thresh >= .2",
1661	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1662	  cache2, 4 },
1663/*09*/	{ "cache1", "thresh >= .2",
1664	  "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1665	  cache1, 2 },
1666/*10*/	{ "dtlbmissload", "thresh >= .1",
1667	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1668	  dtlb_missload, 3 },
1669/*11*/	{ "dtlbmissstore", "thresh >= .05",
1670	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1671	  dtlb_missstore, 3 },
1672/*12*/	{ "frontendstall", "thresh >= .15",
1673	  "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1674	  frontendstall, 2 },
1675/*13*/	{ "clears", "thresh >= .02",
1676	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1677	  clears, 4 },
1678/*14*/	{ "microassist", "thresh >= .05",
1679	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1680	  microassist, 2 },
1681/*15*/	{ "aliasing_4k", "thresh >= .1",
1682	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1683	  aliasing, 2 },
1684/*16*/	{ "fpassist", "look for a excessive value",
1685	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1686	  fpassists, 2 },
1687/*17*/	{ "otherassistavx", "look for a excessive value",
1688	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1689	  otherassistavx, 2},
1690/*18*/	{ "otherassistsse", "look for a excessive value",
1691	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1692	  otherassistsse, 2 },
1693/*19*/	{ "eff1", "thresh < .9",
1694	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1695	  efficiency1, 2 },
1696/*20*/	{ "eff2", "thresh > 1.0",
1697	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1698	  efficiency2, 2 },
1699};
1700
1701
1702#define IVY_BRIDGE_COUNT 21
1703static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1704/*1*/	{ "eff1", "thresh < .75",
1705	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1706	  efficiency1, 2 },
1707/*2*/	{ "eff2", "thresh > 1.0",
1708	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1709	  efficiency2, 2 },
1710/*3*/	{ "itlbmiss", "thresh > .05",
1711	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1712	  itlb_miss, 2 },
1713/*4*/	{ "icachemiss", "thresh > .05",
1714	  "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1715	  icache_miss, 3 },
1716/*5*/	{ "lcpstall", "thresh > .05",
1717	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1718	  lcp_stall, 2 },
1719/*6*/	{ "cache1", "thresh >= .2",
1720	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1721	  cache1ib, 2 },
1722/*7*/	{ "cache2", "thresh >= .2",
1723	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1724	  cache2ib, 2 },
1725/*8*/	{ "contested", "thresh >= .05",
1726	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1727	  contested, 2 },
1728/*9*/	{ "datashare", "thresh >= .05",
1729	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1730	  datasharing, 2 },
1731/*10*/	{ "blockstorefwd", "thresh >= .05",
1732	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1733	  blockstoreforward, 2 },
1734/*11*/	{ "splitload", "thresh >= .1",
1735	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1736	  splitloadib, 4 },
1737/*12*/	{ "splitstore", "thresh >= .01",
1738	  "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1739	  splitstore, 2 },
1740/*13*/	{ "aliasing_4k", "thresh >= .1",
1741	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1742	  aliasing, 2 },
1743/*14*/	{ "dtlbmissload", "thresh >= .1",
1744	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1745	  dtlb_missload , 3},
1746/*15*/	{ "dtlbmissstore", "thresh >= .05",
1747	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1748	  dtlb_missstore, 3 },
1749/*16*/	{ "br_miss", "thresh >= .2",
1750	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1751	  br_mispredictib, 8 },
1752/*17*/	{ "clears", "thresh >= .02",
1753	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1754	  clears, 4 },
1755/*18*/	{ "microassist", "thresh >= .05",
1756	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1757	  microassist, 2 },
1758/*19*/	{ "fpassist", "look for a excessive value",
1759	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1760	  fpassists, 2 },
1761/*20*/	{ "otherassistavx", "look for a excessive value",
1762	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1763	  otherassistavx , 2},
1764/*21*/	{ "otherassistsse", "look for a excessive value",
1765	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1766	  otherassistsse, 2 },
1767};
1768
1769#define HASWELL_COUNT 20
1770static struct cpu_entry haswell[HASWELL_COUNT] = {
1771/*1*/	{ "eff1", "thresh < .75",
1772	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1773	  efficiency1, 2 },
1774/*2*/	{ "eff2", "thresh > 1.0",
1775	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1776	  efficiency2, 2 },
1777/*3*/	{ "itlbmiss", "thresh > .05",
1778	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1779	  itlb_miss, 2 },
1780/*4*/	{ "icachemiss", "thresh > .05",
1781	  "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1782	  icache_miss_has, 2 },
1783/*5*/	{ "lcpstall", "thresh > .05",
1784	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1785	  lcp_stall, 2 },
1786/*6*/	{ "cache1", "thresh >= .2",
1787	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1788	  cache1ib, 2 },
1789/*7*/	{ "cache2", "thresh >= .2",
1790	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1791	  cache2has, 4 },
1792/*8*/	{ "contested", "thresh >= .05",
1793	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1794	  contested_has, 2 },
1795/*9*/	{ "datashare", "thresh >= .05",
1796	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1797	  datasharing_has, 2 },
1798/*10*/	{ "blockstorefwd", "thresh >= .05",
1799	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1800	  blockstoreforward, 2 },
1801/*11*/	{ "splitload", "thresh >= .1",
1802	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
1803	  splitload , 2},
1804/*12*/	{ "splitstore", "thresh >= .01",
1805	  "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1806	  splitstore, 2 },
1807/*13*/	{ "aliasing_4k", "thresh >= .1",
1808	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1809	  aliasing, 2 },
1810/*14*/	{ "dtlbmissload", "thresh >= .1",
1811	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1812	  dtlb_missload, 3 },
1813/*15*/	{ "br_miss", "thresh >= .2",
1814	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1815	  br_mispredict, 2 },
1816/*16*/	{ "clears", "thresh >= .02",
1817	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1818	  clears, 4 },
1819/*17*/	{ "microassist", "thresh >= .05",
1820	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1821	  microassist, 2 },
1822/*18*/	{ "fpassist", "look for a excessive value",
1823	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1824	  fpassists, 2 },
1825/*19*/	{ "otherassistavx", "look for a excessive value",
1826	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1827	  otherassistavx, 2 },
1828/*20*/	{ "otherassistsse", "look for a excessive value",
1829	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1830	  otherassistsse, 2 },
1831};
1832
1833
1834static void
1835explain_name_broad(const char *name)
1836{
1837	const char *mythresh;
1838	if (strcmp(name, "eff1") == 0) {
1839		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
1840		mythresh = "thresh < .75";
1841	} else if (strcmp(name, "eff2") == 0) {
1842		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
1843		mythresh = "thresh > 1.0";
1844	} else if (strcmp(name, "itlbmiss") == 0) {
1845		printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
1846		mythresh = "thresh > .05";
1847	} else if (strcmp(name, "icachemiss") == 0) {
1848		printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
1849		mythresh = "thresh > .05";
1850	} else if (strcmp(name, "lcpstall") == 0) {
1851		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
1852		mythresh = "thresh > .05";
1853	} else if (strcmp(name, "cache1") == 0) {
1854		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
1855		mythresh = "thresh >= .1";
1856	} else if (strcmp(name, "cache2") == 0) {
1857		printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
1858		mythresh = "thresh >= .2";
1859	} else if (strcmp(name, "contested") == 0) {
1860		printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) +  MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
1861		mythresh = "thresh >= .05";
1862	} else if (strcmp(name, "datashare") == 0) {
1863		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
1864		mythresh = "thresh > .05";
1865	} else if (strcmp(name, "blockstorefwd") == 0) {
1866		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
1867		mythresh = "thresh >= .05";
1868	} else if (strcmp(name, "aliasing_4k") == 0) {
1869		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
1870		mythresh = "thresh >= .1";
1871	} else if (strcmp(name, "dtlbmissload") == 0) {
1872		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
1873		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
1874		mythresh = "thresh >= .1";
1875
1876	} else if (strcmp(name, "br_miss") == 0) {
1877		printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
1878		printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
1879		printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
1880		mythresh = "thresh >= .2";
1881	} else if (strcmp(name, "clears") == 0) {
1882		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
1883		printf("          MACHINE_CLEARS.SMC + \n");
1884		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
1885		mythresh = "thresh >= .02";
1886	} else if (strcmp(name, "fpassist") == 0) {
1887		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
1888		mythresh = "look for a excessive value";
1889	} else if (strcmp(name, "otherassistavx") == 0) {
1890		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
1891		mythresh = "look for a excessive value";
1892	} else if (strcmp(name, "microassist") == 0) {
1893		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
1894		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
1895		mythresh = "thresh >= .05";
1896	} else {
1897		printf("Unknown name:%s\n", name);
1898		mythresh = "unknown entry";
1899        }
1900	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
1901}
1902
1903
1904#define BROADWELL_COUNT 17
1905static struct cpu_entry broadwell[BROADWELL_COUNT] = {
1906/*1*/	{ "eff1", "thresh < .75",
1907	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1908	  efficiency1, 2 },
1909/*2*/	{ "eff2", "thresh > 1.0",
1910	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1911	  efficiency2, 2 },
1912/*3*/	{ "itlbmiss", "thresh > .05",
1913	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
1914	  itlb_miss_broad, 3 },
1915/*4*/	{ "icachemiss", "thresh > .05",
1916	  "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1917	  icache_miss_has, 2 },
1918/*5*/	{ "lcpstall", "thresh > .05",
1919	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1920	  lcp_stall, 2 },
1921/*6*/	{ "cache1", "thresh >= .1",
1922	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1923	  cache1broad, 2 },
1924/*7*/	{ "cache2", "thresh >= .2",
1925	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1926	  cache2broad, 2 },
1927/*8*/	{ "contested", "thresh >= .05",
1928	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
1929	  contestedbroad, 2 },
1930/*9*/	{ "datashare", "thresh >= .05",
1931	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1932	  datasharing_has, 2 },
1933/*10*/	{ "blockstorefwd", "thresh >= .05",
1934	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1935	  blockstoreforward, 2 },
1936/*11*/	{ "aliasing_4k", "thresh >= .1",
1937	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1938	  aliasing_broad, 2 },
1939/*12*/	{ "dtlbmissload", "thresh >= .1",
1940	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1941	  dtlb_missload, 3 },
1942/*13*/	{ "br_miss", "thresh >= .2",
1943	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1944	  br_mispredict_broad, 7 },
1945/*14*/	{ "clears", "thresh >= .02",
1946	  "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1947	  clears_broad, 5 },
1948/*15*/	{ "fpassist", "look for a excessive value",
1949	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1950	  fpassists, 2 },
1951/*16*/	{ "otherassistavx", "look for a excessive value",
1952	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1953	  otherassistavx, 2 },
1954/*17*/	{ "microassist", "thresh >= .2",
1955	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS  -w 1",
1956	  microassist_broad, 4 },
1957};
1958
1959
1960static void
1961set_sandybridge(void)
1962{
1963	strcpy(the_cpu.cputype, "SandyBridge PMC");
1964	the_cpu.number = SANDY_BRIDGE_COUNT;
1965	the_cpu.ents = sandy_bridge;
1966	the_cpu.explain = explain_name_sb;
1967}
1968
1969static void
1970set_ivybridge(void)
1971{
1972	strcpy(the_cpu.cputype, "IvyBridge PMC");
1973	the_cpu.number = IVY_BRIDGE_COUNT;
1974	the_cpu.ents = ivy_bridge;
1975	the_cpu.explain = explain_name_ib;
1976}
1977
1978
1979static void
1980set_haswell(void)
1981{
1982	strcpy(the_cpu.cputype, "HASWELL PMC");
1983	the_cpu.number = HASWELL_COUNT;
1984	the_cpu.ents = haswell;
1985	the_cpu.explain = explain_name_has;
1986}
1987
1988
1989static void
1990set_broadwell(void)
1991{
1992	strcpy(the_cpu.cputype, "HASWELL PMC");
1993	the_cpu.number = BROADWELL_COUNT;
1994	the_cpu.ents = broadwell;
1995	the_cpu.explain = explain_name_broad;
1996}
1997
1998
1999static int
2000set_expression(const char *name)
2001{
2002	int found = 0, i;
2003	for(i=0 ; i< the_cpu.number; i++) {
2004		if (strcmp(name, the_cpu.ents[i].name) == 0) {
2005			found = 1;
2006			expression = the_cpu.ents[i].func;
2007			command = the_cpu.ents[i].command;
2008			threshold = the_cpu.ents[i].thresh;
2009			if  (the_cpu.ents[i].counters_required > max_pmc_counters) {
2010				printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
2011				       the_cpu.ents[i].name,
2012				       the_cpu.ents[i].counters_required, max_pmc_counters);
2013				printf("Sorry this test can not be run\n");
2014				if (run_all == 0) {
2015					exit(-1);
2016				} else {
2017					return(-1);
2018				}
2019			}
2020			break;
2021		}
2022	}
2023	if (!found) {
2024		printf("For CPU type %s we have no expression:%s\n",
2025		       the_cpu.cputype, name);
2026		exit(-1);
2027	}
2028	return(0);
2029}
2030
2031
2032
2033
2034
2035static int
2036validate_expression(char *name)
2037{
2038	int i, found;
2039
2040	found = 0;
2041	for(i=0 ; i< the_cpu.number; i++) {
2042		if (strcmp(name, the_cpu.ents[i].name) == 0) {
2043			found = 1;
2044			break;
2045		}
2046	}
2047	if (!found) {
2048		return(-1);
2049	}
2050	return (0);
2051}
2052
2053static void
2054do_expression(struct counters *cpu, int pos)
2055{
2056	if (expression == NULL)
2057		return;
2058	(*expression)(cpu, pos);
2059}
2060
2061static void
2062process_header(int idx, char *p)
2063{
2064	struct counters *up;
2065	int i, len, nlen;
2066	/*
2067	 * Given header element idx, at p in
2068	 * form 's/NN/nameof'
2069	 * process the entry to pull out the name and
2070	 * the CPU number.
2071	 */
2072	if (strncmp(p, "s/", 2)) {
2073		printf("Check -- invalid header no s/ in %s\n",
2074		       p);
2075		return;
2076	}
2077	up = &cnts[idx];
2078	up->cpu = strtol(&p[2], NULL, 10);
2079	len = strlen(p);
2080	for (i=2; i<len; i++) {
2081		if (p[i] == '/') {
2082			nlen = strlen(&p[(i+1)]);
2083			if (nlen < (MAX_NLEN-1)) {
2084				strcpy(up->counter_name, &p[(i+1)]);
2085			} else {
2086				strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
2087			}
2088		}
2089	}
2090}
2091
2092static void
2093build_counters_from_header(FILE *io)
2094{
2095	char buffer[8192], *p;
2096	int i, len, cnt;
2097	size_t mlen;
2098
2099	/* We have a new start, lets
2100	 * setup our headers and cpus.
2101	 */
2102	if (fgets(buffer, sizeof(buffer), io) == NULL) {
2103		printf("First line can't be read from file err:%d\n", errno);
2104		return;
2105	}
2106	/*
2107	 * Ok output is an array of counters. Once
2108	 * we start to read the values in we must
2109	 * put them in there slot to match there CPU and
2110	 * counter being updated. We create a mass array
2111	 * of the counters, filling in the CPU and
2112	 * counter name.
2113	 */
2114	/* How many do we get? */
2115	len = strlen(buffer);
2116	for (i=0, cnt=0; i<len; i++) {
2117		if (strncmp(&buffer[i], "s/", 2) == 0) {
2118			cnt++;
2119			for(;i<len;i++) {
2120				if (buffer[i] == ' ')
2121					break;
2122			}
2123		}
2124	}
2125	mlen = sizeof(struct counters) * cnt;
2126	cnts = malloc(mlen);
2127	ncnts = cnt;
2128	if (cnts == NULL) {
2129		printf("No memory err:%d\n", errno);
2130		return;
2131	}
2132	memset(cnts, 0, mlen);
2133	for (i=0, cnt=0; i<len; i++) {
2134		if (strncmp(&buffer[i], "s/", 2) == 0) {
2135			p = &buffer[i];
2136			for(;i<len;i++) {
2137				if (buffer[i] == ' ') {
2138					buffer[i] = 0;
2139					break;
2140				}
2141			}
2142			process_header(cnt, p);
2143			cnt++;
2144		}
2145	}
2146	if (verbose)
2147		printf("We have %d entries\n", cnt);
2148}
2149extern int max_to_collect;
2150int max_to_collect = MAX_COUNTER_SLOTS;
2151
2152static int
2153read_a_line(FILE *io)
2154{
2155	char buffer[8192], *p, *stop;
2156	int pos, i;
2157
2158	if (fgets(buffer, sizeof(buffer), io) == NULL) {
2159		return(0);
2160	}
2161	p = buffer;
2162	for (i=0; i<ncnts; i++) {
2163		pos = cnts[i].pos;
2164		cnts[i].vals[pos] = strtol(p, &stop, 0);
2165		cnts[i].pos++;
2166		cnts[i].sum += cnts[i].vals[pos];
2167		p = stop;
2168	}
2169	return (1);
2170}
2171
2172extern int cpu_count_out;
2173int cpu_count_out=0;
2174
2175static void
2176print_header(void)
2177{
2178	int i, cnt, printed_cnt;
2179
2180	printf("*********************************\n");
2181	for(i=0, cnt=0; i<MAX_CPU; i++) {
2182		if (glob_cpu[i]) {
2183			cnt++;
2184		}
2185	}
2186	cpu_count_out = cnt;
2187	for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
2188		if (glob_cpu[i]) {
2189			printf("CPU%d", i);
2190			printed_cnt++;
2191		}
2192		if (printed_cnt == cnt) {
2193			printf("\n");
2194			break;
2195		} else {
2196			printf("\t");
2197		}
2198	}
2199}
2200
2201static void
2202lace_cpus_together(void)
2203{
2204	int i, j, lace_cpu;
2205	struct counters *cpat, *at;
2206
2207	for(i=0; i<ncnts; i++) {
2208		cpat = &cnts[i];
2209		if (cpat->next_cpu) {
2210			/* Already laced in */
2211			continue;
2212		}
2213		lace_cpu = cpat->cpu;
2214		if (lace_cpu >= MAX_CPU) {
2215			printf("CPU %d to big\n", lace_cpu);
2216			continue;
2217		}
2218		if (glob_cpu[lace_cpu] == NULL) {
2219			glob_cpu[lace_cpu] = cpat;
2220		} else {
2221			/* Already processed this cpu */
2222			continue;
2223		}
2224		/* Ok look forward for cpu->cpu and link in */
2225		for(j=(i+1); j<ncnts; j++) {
2226			at = &cnts[j];
2227			if (at->next_cpu) {
2228				continue;
2229			}
2230			if (at->cpu == lace_cpu) {
2231				/* Found one */
2232				cpat->next_cpu = at;
2233				cpat = at;
2234			}
2235		}
2236	}
2237}
2238
2239
2240static void
2241process_file(char *filename)
2242{
2243	FILE *io;
2244	int i;
2245	int line_at, not_done;
2246	pid_t pid_of_command=0;
2247
2248	if (filename ==  NULL) {
2249		io = my_popen(command, "r", &pid_of_command);
2250		if (io == NULL) {
2251			printf("Can't popen the command %s\n", command);
2252			return;
2253		}
2254	} else {
2255		io = fopen(filename, "r");
2256		if (io == NULL) {
2257			printf("Can't process file %s err:%d\n",
2258			       filename, errno);
2259			return;
2260		}
2261	}
2262	build_counters_from_header(io);
2263	if (cnts == NULL) {
2264		/* Nothing we can do */
2265		printf("Nothing to do -- no counters built\n");
2266		if (filename) {
2267			fclose(io);
2268		} else {
2269			my_pclose(io, pid_of_command);
2270		}
2271		return;
2272	}
2273	lace_cpus_together();
2274	print_header();
2275	if (verbose) {
2276		for (i=0; i<ncnts; i++) {
2277			printf("Counter:%s cpu:%d index:%d\n",
2278			       cnts[i].counter_name,
2279			       cnts[i].cpu, i);
2280		}
2281	}
2282	line_at = 0;
2283	not_done = 1;
2284	while(not_done) {
2285		if (read_a_line(io)) {
2286			line_at++;
2287		} else {
2288			break;
2289		}
2290		if (line_at >= max_to_collect) {
2291			not_done = 0;
2292		}
2293		if (filename == NULL) {
2294			int cnt;
2295			/* For the ones we dynamically open we print now */
2296			for(i=0, cnt=0; i<MAX_CPU; i++) {
2297				do_expression(glob_cpu[i], (line_at-1));
2298				cnt++;
2299				if (cnt == cpu_count_out) {
2300					printf("\n");
2301					break;
2302				} else {
2303					printf("\t");
2304				}
2305			}
2306		}
2307	}
2308	if (filename) {
2309		fclose(io);
2310	} else {
2311		my_pclose(io, pid_of_command);
2312	}
2313}
2314#if defined(__amd64__)
2315#define cpuid(in,a,b,c,d)\
2316  asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
2317
2318static __inline void
2319do_cpuid(u_int ax, u_int cx, u_int *p)
2320{
2321	__asm __volatile("cpuid"
2322			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
2323			 :  "0" (ax), "c" (cx) );
2324}
2325
2326#else
2327#define cpuid(in, a, b, c, d)
2328#define do_cpuid(ax, cx, p)
2329#endif
2330
2331static void
2332get_cpuid_set(void)
2333{
2334	unsigned long eax, ebx, ecx, edx;
2335	int model;
2336	pid_t pid_of_command=0;
2337	size_t sz, len;
2338	FILE *io;
2339	char linebuf[1024], *str;
2340	u_int reg[4];
2341
2342	eax = ebx = ecx = edx = 0;
2343
2344	cpuid(0, eax, ebx, ecx, edx);
2345	if (ebx == 0x68747541) {
2346		printf("AMD processors are not supported by this program\n");
2347		printf("Sorry\n");
2348		exit(0);
2349	} else if (ebx == 0x6972794) {
2350		printf("Cyrix processors are not supported by this program\n");
2351		printf("Sorry\n");
2352		exit(0);
2353	} else if (ebx == 0x756e6547) {
2354		printf("Genuine Intel\n");
2355	} else {
2356		printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
2357		exit(0);
2358	}
2359	cpuid(1, eax, ebx, ecx, edx);
2360	model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
2361	printf("CPU model is 0x%x id:0x%lx\n", model, eax);
2362	switch (eax & 0xF00) {
2363	case 0x500:		/* Pentium family processors */
2364		printf("Intel Pentium P5\n");
2365		goto not_supported;
2366		break;
2367	case 0x600:		/* Pentium Pro, Celeron, Pentium II & III */
2368		switch (model) {
2369		case 0x1:
2370			printf("Intel Pentium P6\n");
2371			goto not_supported;
2372			break;
2373		case 0x3:
2374		case 0x5:
2375			printf("Intel PII\n");
2376			goto not_supported;
2377			break;
2378		case 0x6: case 0x16:
2379			printf("Intel CL\n");
2380			goto not_supported;
2381			break;
2382		case 0x7: case 0x8: case 0xA: case 0xB:
2383			printf("Intel PIII\n");
2384			goto not_supported;
2385			break;
2386		case 0x9: case 0xD:
2387			printf("Intel PM\n");
2388			goto not_supported;
2389			break;
2390		case 0xE:
2391			printf("Intel CORE\n");
2392			goto not_supported;
2393			break;
2394		case 0xF:
2395			printf("Intel CORE2\n");
2396			goto not_supported;
2397			break;
2398		case 0x17:
2399			printf("Intel CORE2EXTREME\n");
2400			goto not_supported;
2401			break;
2402		case 0x1C:	/* Per Intel document 320047-002. */
2403			printf("Intel ATOM\n");
2404			goto not_supported;
2405			break;
2406		case 0x1A:
2407		case 0x1E:	/*
2408				 * Per Intel document 253669-032 9/2009,
2409				 * pages A-2 and A-57
2410				 */
2411		case 0x1F:	/*
2412				 * Per Intel document 253669-032 9/2009,
2413				 * pages A-2 and A-57
2414				 */
2415			printf("Intel COREI7\n");
2416			goto not_supported;
2417			break;
2418		case 0x2E:
2419			printf("Intel NEHALEM\n");
2420			goto not_supported;
2421			break;
2422		case 0x25:	/* Per Intel document 253669-033US 12/2009. */
2423		case 0x2C:	/* Per Intel document 253669-033US 12/2009. */
2424			printf("Intel WESTMERE\n");
2425			goto not_supported;
2426			break;
2427		case 0x2F:	/* Westmere-EX, seen in wild */
2428			printf("Intel WESTMERE\n");
2429			goto not_supported;
2430			break;
2431		case 0x2A:	/* Per Intel document 253669-039US 05/2011. */
2432			printf("Intel SANDYBRIDGE\n");
2433			set_sandybridge();
2434			break;
2435		case 0x2D:	/* Per Intel document 253669-044US 08/2012. */
2436			printf("Intel SANDYBRIDGE_XEON\n");
2437			set_sandybridge();
2438			break;
2439		case 0x3A:	/* Per Intel document 253669-043US 05/2012. */
2440			printf("Intel IVYBRIDGE\n");
2441			set_ivybridge();
2442			break;
2443		case 0x3E:	/* Per Intel document 325462-045US 01/2013. */
2444			printf("Intel IVYBRIDGE_XEON\n");
2445			set_ivybridge();
2446			break;
2447		case 0x3F:	/* Per Intel document 325462-045US 09/2014. */
2448			printf("Intel HASWELL (Xeon)\n");
2449			set_haswell();
2450			break;
2451		case 0x3C:	/* Per Intel document 325462-045US 01/2013. */
2452		case 0x45:
2453		case 0x46:
2454			printf("Intel HASWELL\n");
2455			set_haswell();
2456			break;
2457
2458		case 0x4e:
2459		case 0x5e:
2460			printf("Intel SKY-LAKE\n");
2461			goto not_supported;
2462			break;
2463		case 0x3D:
2464		case 0x47:
2465			printf("Intel BROADWELL\n");
2466			set_broadwell();
2467			break;
2468		case 0x4f:
2469		case 0x56:
2470			printf("Intel BROADWEL (Xeon)\n");
2471			set_broadwell();
2472			break;
2473
2474		case 0x4D:
2475			/* Per Intel document 330061-001 01/2014. */
2476			printf("Intel ATOM_SILVERMONT\n");
2477			goto not_supported;
2478			break;
2479		default:
2480			printf("Intel model 0x%x is not known -- sorry\n",
2481			       model);
2482			goto not_supported;
2483			break;
2484		}
2485		break;
2486	case 0xF00:		/* P4 */
2487		printf("Intel unknown model %d\n", model);
2488		goto not_supported;
2489		break;
2490	}
2491	do_cpuid(0xa, 0, reg);
2492	max_pmc_counters = (reg[3] & 0x0000000f) + 1;
2493	printf("We have %d PMC counters to work with\n", max_pmc_counters);
2494	/* Ok lets load the list of all known PMC's */
2495	io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2496	if (valid_pmcs == NULL) {
2497		/* Likely */
2498		pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2499		sz = sizeof(char *) * pmc_allocated_cnt;
2500		valid_pmcs = malloc(sz);
2501		if (valid_pmcs == NULL) {
2502			printf("No memory allocation fails at startup?\n");
2503			exit(-1);
2504		}
2505		memset(valid_pmcs, 0, sz);
2506	}
2507
2508	while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2509		if (linebuf[0] != '\t') {
2510			/* sometimes headers ;-) */
2511			continue;
2512		}
2513		len = strlen(linebuf);
2514		if (linebuf[(len-1)] == '\n') {
2515			/* Likely */
2516			linebuf[(len-1)] = 0;
2517		}
2518		str = &linebuf[1];
2519		len = strlen(str) + 1;
2520		valid_pmcs[valid_pmc_cnt] = malloc(len);
2521		if (valid_pmcs[valid_pmc_cnt] == NULL) {
2522			printf("No memory2 allocation fails at startup?\n");
2523			exit(-1);
2524		}
2525		memset(valid_pmcs[valid_pmc_cnt], 0, len);
2526		strcpy(valid_pmcs[valid_pmc_cnt], str);
2527		valid_pmc_cnt++;
2528		if (valid_pmc_cnt >= pmc_allocated_cnt) {
2529			/* Got to expand -- unlikely */
2530			char **more;
2531
2532			sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2533			more = malloc(sz);
2534			if (more == NULL) {
2535				printf("No memory3 allocation fails at startup?\n");
2536				exit(-1);
2537			}
2538			memset(more, 0, sz);
2539			memcpy(more, valid_pmcs, sz);
2540			pmc_allocated_cnt *= 2;
2541			free(valid_pmcs);
2542			valid_pmcs = more;
2543		}
2544	}
2545	my_pclose(io, pid_of_command);
2546	return;
2547not_supported:
2548	printf("Not supported\n");
2549	exit(-1);
2550}
2551
2552static void
2553explain_all(void)
2554{
2555	int i;
2556	printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2557	printf("-------------------------------------------------------------\n");
2558	for(i=0; i<the_cpu.number; i++){
2559		printf("For -e %s ", the_cpu.ents[i].name);
2560		(*the_cpu.explain)(the_cpu.ents[i].name);
2561		printf("----------------------------\n");
2562	}
2563}
2564
2565static void
2566test_for_a_pmc(const char *pmc, int out_so_far)
2567{
2568	FILE *io;
2569	pid_t pid_of_command=0;
2570	char my_command[1024];
2571	char line[1024];
2572	char resp[1024];
2573	int len, llen, i;
2574
2575	if (out_so_far < 50) {
2576		len = 50 - out_so_far;
2577		for(i=0; i<len; i++) {
2578			printf(" ");
2579		}
2580	}
2581	sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2582	io = my_popen(my_command, "r", &pid_of_command);
2583	if (io == NULL) {
2584		printf("Failed -- popen fails\n");
2585		return;
2586	}
2587	/* Setup what we expect */
2588	len = sprintf(resp, "%s", pmc);
2589	if (fgets(line, sizeof(line), io) == NULL) {
2590		printf("Failed -- no output from pmstat\n");
2591		goto out;
2592	}
2593	llen = strlen(line);
2594	if (line[(llen-1)] == '\n') {
2595		line[(llen-1)] = 0;
2596		llen--;
2597	}
2598	for(i=2; i<(llen-len); i++) {
2599		if (strncmp(&line[i], "ERROR", 5) == 0) {
2600			printf("Failed %s\n", line);
2601			goto out;
2602		} else if (strncmp(&line[i], resp, len) == 0) {
2603			int j, k;
2604
2605			if (fgets(line, sizeof(line), io) == NULL) {
2606				printf("Failed -- no second output from pmstat\n");
2607				goto out;
2608			}
2609			len = strlen(line);
2610			for (j=0; j<len; j++) {
2611				if (line[j] == ' ') {
2612					j++;
2613				} else {
2614					break;
2615				}
2616			}
2617			printf("Pass");
2618			len = strlen(&line[j]);
2619			if (len < 20) {
2620				for(k=0; k<(20-len); k++) {
2621					printf(" ");
2622				}
2623			}
2624			if (len) {
2625				printf("%s", &line[j]);
2626			} else {
2627				printf("\n");
2628			}
2629			goto out;
2630		}
2631	}
2632	printf("Failed -- '%s' not '%s'\n", line, resp);
2633out:
2634	my_pclose(io, pid_of_command);
2635
2636}
2637
2638static int
2639add_it_to(char **vars, int cur_cnt, char *name)
2640{
2641	int i;
2642	size_t len;
2643	for(i=0; i<cur_cnt; i++) {
2644		if (strcmp(vars[i], name) == 0) {
2645			/* Already have */
2646			return(0);
2647		}
2648	}
2649	if (vars[cur_cnt] != NULL) {
2650		printf("Cur_cnt:%d filled with %s??\n",
2651		       cur_cnt, vars[cur_cnt]);
2652		exit(-1);
2653	}
2654	/* Ok its new */
2655	len = strlen(name) + 1;
2656	vars[cur_cnt] = malloc(len);
2657	if (vars[cur_cnt] == NULL) {
2658		printf("No memory %s\n", __FUNCTION__);
2659		exit(-1);
2660	}
2661	memset(vars[cur_cnt], 0, len);
2662	strcpy(vars[cur_cnt], name);
2663	return(1);
2664}
2665
2666static char *
2667build_command_for_exp(struct expression *exp)
2668{
2669	/*
2670	 * Build the pmcstat command to handle
2671	 * the passed in expression.
2672	 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2673	 * where NNN and QQQ represent the PMC's in the expression
2674	 * uniquely..
2675	 */
2676	char forming[1024];
2677	int cnt_pmc, alloced_pmcs, i;
2678	struct expression *at;
2679	char **vars, *cmd;
2680	size_t mal;
2681
2682	alloced_pmcs = cnt_pmc = 0;
2683	/* first how many do we have */
2684	at = exp;
2685	while (at) {
2686		if (at->type == TYPE_VALUE_PMC) {
2687			cnt_pmc++;
2688		}
2689		at = at->next;
2690	}
2691	if (cnt_pmc == 0) {
2692		printf("No PMC's in your expression -- nothing to do!!\n");
2693		exit(0);
2694	}
2695	mal = cnt_pmc * sizeof(char *);
2696	vars = malloc(mal);
2697	if (vars == NULL) {
2698		printf("No memory\n");
2699		exit(-1);
2700	}
2701	memset(vars, 0, mal);
2702	at = exp;
2703	while (at) {
2704		if (at->type == TYPE_VALUE_PMC) {
2705			if(add_it_to(vars, alloced_pmcs, at->name)) {
2706				alloced_pmcs++;
2707			}
2708		}
2709		at = at->next;
2710	}
2711	/* Now we have a unique list in vars so create our command */
2712	mal = 23; /*	"/usr/sbin/pmcstat -w 1"  + \0 */
2713	for(i=0; i<alloced_pmcs; i++) {
2714		mal += strlen(vars[i]) + 4;	/* var + " -s " */
2715	}
2716	cmd = malloc((mal+2));
2717	if (cmd == NULL) {
2718		printf("%s out of mem\n", __FUNCTION__);
2719		exit(-1);
2720	}
2721	memset(cmd, 0, (mal+2));
2722	strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2723	at = exp;
2724	for(i=0; i<alloced_pmcs; i++) {
2725		sprintf(forming, " -s %s", vars[i]);
2726		strcat(cmd, forming);
2727		free(vars[i]);
2728		vars[i] = NULL;
2729	}
2730	free(vars);
2731	return(cmd);
2732}
2733
2734static int
2735user_expr(struct counters *cpu, int pos)
2736{
2737	int ret;
2738	double res;
2739	struct counters *var;
2740	struct expression *at;
2741
2742	at = master_exp;
2743	while (at) {
2744		if (at->type == TYPE_VALUE_PMC) {
2745			var = find_counter(cpu, at->name);
2746			if (var == NULL) {
2747				printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2748				exit(-1);
2749			}
2750			if (pos != -1) {
2751				at->value = var->vals[pos] * 1.0;
2752			} else {
2753				at->value = var->sum * 1.0;
2754			}
2755		}
2756		at = at->next;
2757	}
2758	res = run_expr(master_exp, 1, NULL);
2759	ret = printf("%1.3f", res);
2760	return(ret);
2761}
2762
2763
2764static void
2765set_manual_exp(struct expression *exp)
2766{
2767	expression = user_expr;
2768	command = build_command_for_exp(exp);
2769	threshold = "User defined threshold";
2770}
2771
2772static void
2773run_tests(void)
2774{
2775	int i, lenout;
2776	printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2777	printf("------------------------------------------------------------------------\n");
2778	for(i=0; i<valid_pmc_cnt; i++) {
2779		lenout = printf("%s", valid_pmcs[i]);
2780		fflush(stdout);
2781		test_for_a_pmc(valid_pmcs[i], lenout);
2782	}
2783}
2784static void
2785list_all(void)
2786{
2787	int i, cnt, j;
2788	printf("PMC                                               Abbreviation\n");
2789	printf("--------------------------------------------------------------\n");
2790	for(i=0; i<valid_pmc_cnt; i++) {
2791		cnt = printf("%s", valid_pmcs[i]);
2792		for(j=cnt; j<52; j++) {
2793			printf(" ");
2794		}
2795		printf("%%%d\n", i);
2796	}
2797}
2798
2799
2800int
2801main(int argc, char **argv)
2802{
2803	int i, j, cnt;
2804	char *filename=NULL;
2805	const char *name=NULL;
2806	int help_only = 0;
2807	int test_mode = 0;
2808	int test_at = 0;
2809
2810	get_cpuid_set();
2811	memset(glob_cpu, 0, sizeof(glob_cpu));
2812	while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
2813		switch (i) {
2814		case 'A':
2815			run_all = 1;
2816			break;
2817		case 'L':
2818			list_all();
2819			return(0);
2820		case 'H':
2821			printf("**********************************\n");
2822			explain_all();
2823			printf("**********************************\n");
2824			return(0);
2825			break;
2826		case 'T':
2827			test_mode = 1;
2828			break;
2829		case 'E':
2830			master_exp = parse_expression(optarg);
2831			if (master_exp) {
2832				set_manual_exp(master_exp);
2833			}
2834			break;
2835		case 'e':
2836			if (validate_expression(optarg)) {
2837				printf("Unknown expression %s\n", optarg);
2838				return(0);
2839			}
2840			name = optarg;
2841			set_expression(optarg);
2842			break;
2843		case 'm':
2844			max_to_collect = strtol(optarg, NULL, 0);
2845			if (max_to_collect > MAX_COUNTER_SLOTS) {
2846				/* You can't collect more than max in array */
2847				max_to_collect = MAX_COUNTER_SLOTS;
2848			}
2849			break;
2850		case 'v':
2851			verbose++;
2852			break;
2853		case 'h':
2854			help_only = 1;
2855			break;
2856		case 'i':
2857			filename = optarg;
2858			break;
2859		case '?':
2860		default:
2861		use:
2862			printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2863			       argv[0]);
2864			printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2865			printf("-v -- verbose dump debug type things -- you don't want this\n");
2866			printf("-m N -- maximum to collect is N measurements\n");
2867			printf("-e expr-name -- Do expression expr-name\n");
2868			printf("-E 'your expression' -- Do your expression\n");
2869			printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2870			printf("-H -- Don't run anything, just explain all canned expressions\n");
2871			printf("-T -- Test all PMC's defined by this processor\n");
2872			printf("-A -- Run all canned tests\n");
2873			return(0);
2874			break;
2875		}
2876	}
2877	if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
2878	    (test_mode == 0) && (master_exp == NULL)) {
2879		printf("Without setting an expression we cannot dynamically gather information\n");
2880		printf("you must supply a filename (and you probably want verbosity)\n");
2881		goto use;
2882	}
2883	if (run_all && max_to_collect > 10) {
2884		max_to_collect = 3;
2885	}
2886	if (test_mode) {
2887		run_tests();
2888		return(0);
2889	}
2890	printf("*********************************\n");
2891	if ((master_exp == NULL) && name) {
2892		(*the_cpu.explain)(name);
2893	} else if (master_exp) {
2894		printf("Examine your expression ");
2895		print_exp(master_exp);
2896		printf("User defined threshold\n");
2897	}
2898	if (help_only) {
2899		return(0);
2900	}
2901	if (run_all) {
2902	more:
2903		name = the_cpu.ents[test_at].name;
2904		printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
2905		test_at++;
2906		if (set_expression(name) == -1) {
2907			if (test_at >= the_cpu.number) {
2908				goto done;
2909			} else
2910				goto more;
2911		}
2912
2913	}
2914	process_file(filename);
2915	if (verbose >= 2) {
2916		for (i=0; i<ncnts; i++) {
2917			printf("Counter:%s cpu:%d index:%d\n",
2918			       cnts[i].counter_name,
2919			       cnts[i].cpu, i);
2920			for(j=0; j<cnts[i].pos; j++) {
2921				printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2922			}
2923			printf(" sum - %ld\n", (long int)cnts[i].sum);
2924		}
2925	}
2926	if (expression == NULL) {
2927		return(0);
2928	}
2929	if (max_to_collect > 1) {
2930		for(i=0, cnt=0; i<MAX_CPU; i++) {
2931			if (glob_cpu[i]) {
2932				do_expression(glob_cpu[i], -1);
2933				cnt++;
2934				if (cnt == cpu_count_out) {
2935					printf("\n");
2936					break;
2937				} else {
2938					printf("\t");
2939				}
2940			}
2941		}
2942	}
2943	if (run_all && (test_at < the_cpu.number)) {
2944		memset(glob_cpu, 0, sizeof(glob_cpu));
2945		ncnts = 0;
2946		printf("*********************************\n");
2947		goto more;
2948	} else if (run_all) {
2949	done:
2950		printf("*********************************\n");
2951	}
2952	return(0);
2953}
2954