1/*-
2 * Copyright (c) 2014, 2015 Netflix Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer,
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28#include <sys/types.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <unistd.h>
32#include <string.h>
33#include <strings.h>
34#include <sys/errno.h>
35#include <signal.h>
36#include <sys/wait.h>
37#include <getopt.h>
38#include "eval_expr.h"
39__FBSDID("$FreeBSD$");
40
41static int max_pmc_counters = 1;
42static int run_all = 0;
43
44#define MAX_COUNTER_SLOTS 1024
45#define MAX_NLEN 64
46#define MAX_CPU 64
47static int verbose = 0;
48
49extern char **environ;
50extern struct expression *master_exp;
51struct expression *master_exp=NULL;
52
53#define PMC_INITIAL_ALLOC 512
54extern char **valid_pmcs;
55char **valid_pmcs = NULL;
56extern int valid_pmc_cnt;
57int valid_pmc_cnt=0;
58extern int pmc_allocated_cnt;
59int pmc_allocated_cnt=0;
60
61/*
62 * The following two varients on popen and pclose with
63 * the cavet that they get you the PID so that you
64 * can supply it to pclose so it can send a SIGTERM
65 *  to the process.
66 */
67static FILE *
68my_popen(const char *command, const char *dir, pid_t *p_pid)
69{
70	FILE *io_out, *io_in;
71	int pdesin[2], pdesout[2];
72	char *argv[4];
73	pid_t pid;
74	char cmd[4];
75	char cmd2[1024];
76	char arg1[4];
77
78	if ((strcmp(dir, "r") != 0) &&
79	    (strcmp(dir, "w") != 0)) {
80		errno = EINVAL;
81		return(NULL);
82	}
83	if (pipe(pdesin) < 0)
84		return (NULL);
85
86	if (pipe(pdesout) < 0) {
87		(void)close(pdesin[0]);
88		(void)close(pdesin[1]);
89		return (NULL);
90	}
91	strcpy(cmd, "sh");
92	strcpy(arg1, "-c");
93	strcpy(cmd2, command);
94	argv[0] = cmd;
95	argv[1] = arg1;
96	argv[2] = cmd2;
97	argv[3] = NULL;
98
99	switch (pid = fork()) {
100	case -1:			/* Error. */
101		(void)close(pdesin[0]);
102		(void)close(pdesin[1]);
103		(void)close(pdesout[0]);
104		(void)close(pdesout[1]);
105		return (NULL);
106		/* NOTREACHED */
107	case 0:				/* Child. */
108		/* Close out un-used sides */
109		(void)close(pdesin[1]);
110		(void)close(pdesout[0]);
111		/* Now prepare the stdin of the process */
112		close(0);
113		(void)dup(pdesin[0]);
114		(void)close(pdesin[0]);
115		/* Now prepare the stdout of the process */
116		close(1);
117		(void)dup(pdesout[1]);
118		/* And lets do stderr just in case */
119		close(2);
120		(void)dup(pdesout[1]);
121		(void)close(pdesout[1]);
122		/* Now run it */
123		execve("/bin/sh", argv, environ);
124		exit(127);
125		/* NOTREACHED */
126	}
127	/* Parent; assume fdopen can't fail. */
128	/* Store the pid */
129	*p_pid = pid;
130	if (strcmp(dir, "r") != 0) {
131		io_out = fdopen(pdesin[1], "w");
132		(void)close(pdesin[0]);
133		(void)close(pdesout[0]);
134		(void)close(pdesout[1]);
135		return(io_out);
136 	} else {
137		/* Prepare the input stream */
138		io_in = fdopen(pdesout[0], "r");
139		(void)close(pdesout[1]);
140		(void)close(pdesin[0]);
141		(void)close(pdesin[1]);
142		return (io_in);
143	}
144}
145
146/*
147 * pclose --
148 *	Pclose returns -1 if stream is not associated with a `popened' command,
149 *	if already `pclosed', or waitpid returns an error.
150 */
151static void
152my_pclose(FILE *io, pid_t the_pid)
153{
154	int pstat;
155	pid_t pid;
156
157	/*
158	 * Find the appropriate file pointer and remove it from the list.
159	 */
160	(void)fclose(io);
161	/* Die if you are not dead! */
162	kill(the_pid, SIGTERM);
163	do {
164		pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
165	} while (pid == -1 && errno == EINTR);
166}
167
168struct counters {
169	struct counters *next_cpu;
170	char counter_name[MAX_NLEN];		/* Name of counter */
171	int cpu;				/* CPU we are on */
172	int pos;				/* Index we are filling to. */
173	uint64_t vals[MAX_COUNTER_SLOTS];	/* Last 64 entries */
174	uint64_t sum;				/* Summary of entries */
175};
176
177extern struct counters *glob_cpu[MAX_CPU];
178struct counters *glob_cpu[MAX_CPU];
179
180extern struct counters *cnts;
181struct counters *cnts=NULL;
182
183extern int ncnts;
184int ncnts=0;
185
186extern int (*expression)(struct counters *, int);
187int (*expression)(struct counters *, int);
188
189static const char *threshold=NULL;
190static const char *command;
191
192struct cpu_entry {
193	const char *name;
194	const char *thresh;
195	const char *command;
196	int (*func)(struct counters *, int);
197	int counters_required;
198};
199
200struct cpu_type {
201	char cputype[32];
202	int number;
203	struct cpu_entry *ents;
204	void (*explain)(const char *name);
205};
206extern struct cpu_type the_cpu;
207struct cpu_type the_cpu;
208
209static void
210explain_name_sb(const char *name)
211{
212	const char *mythresh;
213	if (strcmp(name, "allocstall1") == 0) {
214		printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
215		mythresh = "thresh > .05";
216	} else if (strcmp(name, "allocstall2") == 0) {
217		printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
218		mythresh = "thresh > .05";
219	} else if (strcmp(name, "br_miss") == 0) {
220		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
221		mythresh = "thresh >= .2";
222	} else if (strcmp(name, "splitload") == 0) {
223		printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
224		mythresh = "thresh >= .1";
225	} else if (strcmp(name, "splitstore") == 0) {
226		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
227		mythresh = "thresh >= .01";
228	} else if (strcmp(name, "contested") == 0) {
229		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
230		mythresh = "thresh >= .05";
231	} else if (strcmp(name, "blockstorefwd") == 0) {
232		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
233		mythresh = "thresh >= .05";
234	} else if (strcmp(name, "cache2") == 0) {
235		printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
236		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
237		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
238		printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
239		mythresh = "thresh >= .2";
240	} else if (strcmp(name, "cache1") == 0) {
241		printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
242		mythresh = "thresh >= .2";
243	} else if (strcmp(name, "dtlbmissload") == 0) {
244		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
245		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
246		mythresh = "thresh >= .1";
247	} else if (strcmp(name, "frontendstall") == 0) {
248		printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
249		mythresh = "thresh >= .15";
250	} else if (strcmp(name, "clears") == 0) {
251		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
252		printf("          MACHINE_CLEARS.SMC + \n");
253		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
254		mythresh = "thresh >= .02";
255	} else if (strcmp(name, "microassist") == 0) {
256		printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
257		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
258		mythresh = "thresh >= .05";
259	} else if (strcmp(name, "aliasing_4k") == 0) {
260		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
261		mythresh = "thresh >= .1";
262	} else if (strcmp(name, "fpassist") == 0) {
263		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
264		mythresh = "look for a excessive value";
265	} else if (strcmp(name, "otherassistavx") == 0) {
266		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
267		mythresh = "look for a excessive value";
268	} else if (strcmp(name, "otherassistsse") == 0) {
269		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
270		mythresh = "look for a excessive value";
271	} else if (strcmp(name, "eff1") == 0) {
272		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
273		mythresh = "thresh < .9";
274	} else if (strcmp(name, "eff2") == 0) {
275		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
276		mythresh = "thresh > 1.0";
277	} else if (strcmp(name, "dtlbmissstore") == 0) {
278		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
279		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
280		mythresh = "thresh >= .05";
281	} else {
282		printf("Unknown name:%s\n", name);
283		mythresh = "unknown entry";
284        }
285	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
286}
287
288static void
289explain_name_ib(const char *name)
290{
291	const char *mythresh;
292	if (strcmp(name, "br_miss") == 0) {
293		printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
294		printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
295		printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
296		mythresh = "thresh >= .2";
297	} else if (strcmp(name, "eff1") == 0) {
298		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
299		mythresh = "thresh < .9";
300	} else if (strcmp(name, "eff2") == 0) {
301		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
302		mythresh = "thresh > 1.0";
303	} else if (strcmp(name, "cache1") == 0) {
304		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
305		mythresh = "thresh >= .2";
306	} else if (strcmp(name, "cache2") == 0) {
307		printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
308		mythresh = "thresh >= .2";
309	} else if (strcmp(name, "itlbmiss") == 0) {
310		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
311		mythresh = "thresh > .05";
312	} else if (strcmp(name, "icachemiss") == 0) {
313		printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
314		mythresh = "thresh > .05";
315	} else if (strcmp(name, "lcpstall") == 0) {
316		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
317		mythresh = "thresh > .05";
318	} else if (strcmp(name, "datashare") == 0) {
319		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
320		mythresh = "thresh > .05";
321	} else if (strcmp(name, "blockstorefwd") == 0) {
322		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
323		mythresh = "thresh >= .05";
324	} else if (strcmp(name, "splitload") == 0) {
325		printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
326		printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
327		mythresh = "thresh >= .1";
328	} else if (strcmp(name, "splitstore") == 0) {
329		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
330		mythresh = "thresh >= .01";
331	} else if (strcmp(name, "aliasing_4k") == 0) {
332		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
333		mythresh = "thresh >= .1";
334	} else if (strcmp(name, "dtlbmissload") == 0) {
335		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
336		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
337		mythresh = "thresh >= .1";
338	} else if (strcmp(name, "dtlbmissstore") == 0) {
339		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
340		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
341		mythresh = "thresh >= .05";
342	} else if (strcmp(name, "contested") == 0) {
343		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
344		mythresh = "thresh >= .05";
345	} else if (strcmp(name, "clears") == 0) {
346		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
347		printf("          MACHINE_CLEARS.SMC + \n");
348		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
349		mythresh = "thresh >= .02";
350	} else if (strcmp(name, "microassist") == 0) {
351		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
352		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
353		mythresh = "thresh >= .05";
354	} else if (strcmp(name, "fpassist") == 0) {
355		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
356		mythresh = "look for a excessive value";
357	} else if (strcmp(name, "otherassistavx") == 0) {
358		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
359		mythresh = "look for a excessive value";
360	} else if (strcmp(name, "otherassistsse") == 0) {
361		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
362		mythresh = "look for a excessive value";
363	} else {
364		printf("Unknown name:%s\n", name);
365		mythresh = "unknown entry";
366        }
367	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
368}
369
370
371static void
372explain_name_has(const char *name)
373{
374	const char *mythresh;
375	if (strcmp(name, "eff1") == 0) {
376		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
377		mythresh = "thresh < .75";
378	} else if (strcmp(name, "eff2") == 0) {
379		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
380		mythresh = "thresh > 1.0";
381	} else if (strcmp(name, "itlbmiss") == 0) {
382		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
383		mythresh = "thresh > .05";
384	} else if (strcmp(name, "icachemiss") == 0) {
385		printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
386		mythresh = "thresh > .05";
387	} else if (strcmp(name, "lcpstall") == 0) {
388		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
389		mythresh = "thresh > .05";
390	} else if (strcmp(name, "cache1") == 0) {
391		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
392		mythresh = "thresh >= .2";
393	} else if (strcmp(name, "cache2") == 0) {
394		printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
395		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
396		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
397		printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
398		mythresh = "thresh >= .2";
399	} else if (strcmp(name, "contested") == 0) {
400		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
401		mythresh = "thresh >= .05";
402	} else if (strcmp(name, "datashare") == 0) {
403		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
404		mythresh = "thresh > .05";
405	} else if (strcmp(name, "blockstorefwd") == 0) {
406		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
407		mythresh = "thresh >= .05";
408	} else if (strcmp(name, "splitload") == 0) {
409		printf("Examine  (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
410		mythresh = "thresh >= .1";
411	} else if (strcmp(name, "splitstore") == 0) {
412		printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
413		mythresh = "thresh >= .01";
414	} else if (strcmp(name, "aliasing_4k") == 0) {
415		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
416		mythresh = "thresh >= .1";
417	} else if (strcmp(name, "dtlbmissload") == 0) {
418		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
419		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
420		mythresh = "thresh >= .1";
421	} else if (strcmp(name, "br_miss") == 0) {
422		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
423		mythresh = "thresh >= .2";
424	} else if (strcmp(name, "clears") == 0) {
425		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
426		printf("          MACHINE_CLEARS.SMC + \n");
427		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
428		mythresh = "thresh >= .02";
429	} else if (strcmp(name, "microassist") == 0) {
430		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
431		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
432		mythresh = "thresh >= .05";
433	} else if (strcmp(name, "fpassist") == 0) {
434		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
435		mythresh = "look for a excessive value";
436	} else if (strcmp(name, "otherassistavx") == 0) {
437		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
438		mythresh = "look for a excessive value";
439	} else if (strcmp(name, "otherassistsse") == 0) {
440		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
441		mythresh = "look for a excessive value";
442	} else {
443		printf("Unknown name:%s\n", name);
444		mythresh = "unknown entry";
445        }
446	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
447}
448
449
450
451static struct counters *
452find_counter(struct counters *base, const char *name)
453{
454	struct counters *at;
455	int len;
456
457	at = base;
458	len = strlen(name);
459	while(at) {
460		if (strncmp(at->counter_name, name, len) == 0) {
461			return(at);
462		}
463		at = at->next_cpu;
464	}
465	printf("Can't find counter %s\n", name);
466	printf("We have:\n");
467	at = base;
468	while(at) {
469		printf("- %s\n", at->counter_name);
470		at = at->next_cpu;
471	}
472	exit(-1);
473}
474
475static int
476allocstall1(struct counters *cpu, int pos)
477{
478/*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
479	int ret;
480	struct counters *partial;
481	struct counters *unhalt;
482	double un, par, res;
483	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
484	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
485	if (pos != -1) {
486		par = partial->vals[pos] * 1.0;
487		un = unhalt->vals[pos] * 1.0;
488	} else {
489		par = partial->sum * 1.0;
490		un = unhalt->sum * 1.0;
491	}
492	res = par/un;
493	ret = printf("%1.3f", res);
494	return(ret);
495}
496
497static int
498allocstall2(struct counters *cpu, int pos)
499{
500/*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
501	int ret;
502	struct counters *partial;
503	struct counters *unhalt;
504	double un, par, res;
505	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
506	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
507	if (pos != -1) {
508		par = partial->vals[pos] * 1.0;
509		un = unhalt->vals[pos] * 1.0;
510	} else {
511		par = partial->sum * 1.0;
512		un = unhalt->sum * 1.0;
513	}
514	res = par/un;
515	ret = printf("%1.3f", res);
516	return(ret);
517}
518
519static int
520br_mispredict(struct counters *cpu, int pos)
521{
522	struct counters *brctr;
523	struct counters *unhalt;
524	int ret;
525/*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
526	double br, un, con, res;
527	con = 20.0;
528
529	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
530        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
531	if (pos != -1) {
532		br = brctr->vals[pos] * 1.0;
533		un = unhalt->vals[pos] * 1.0;
534	} else {
535		br = brctr->sum * 1.0;
536		un = unhalt->sum * 1.0;
537	}
538	res = (con * br)/un;
539 	ret = printf("%1.3f", res);
540	return(ret);
541}
542
543static int
544br_mispredictib(struct counters *cpu, int pos)
545{
546	struct counters *brctr;
547	struct counters *unhalt;
548	struct counters *clear, *clear2, *clear3;
549	struct counters *uops;
550	struct counters *recv;
551	struct counters *iss;
552/*	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
553	int ret;
554        /*
555	 * (BR_MISP_RETIRED.ALL_BRANCHES /
556	 *         (BR_MISP_RETIRED.ALL_BRANCHES +
557	 *          MACHINE_CLEAR.COUNT) *
558	 *	   ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
559	 *
560	 */
561	double br, cl, cl2, cl3, uo, re, un, con, res, is;
562	con = 4.0;
563
564	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
565        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
566	clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
567	clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
568	clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
569	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
570	iss = find_counter(cpu, "UOPS_ISSUED.ANY");
571	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
572	if (pos != -1) {
573		br = brctr->vals[pos] * 1.0;
574		cl = clear->vals[pos] * 1.0;
575		cl2 = clear2->vals[pos] * 1.0;
576		cl3 = clear3->vals[pos] * 1.0;
577		uo = uops->vals[pos] * 1.0;
578		re = recv->vals[pos] * 1.0;
579		is = iss->vals[pos] * 1.0;
580		un = unhalt->vals[pos] * 1.0;
581	} else {
582		br = brctr->sum * 1.0;
583		cl = clear->sum * 1.0;
584		cl2 = clear2->sum * 1.0;
585		cl3 = clear3->sum * 1.0;
586		uo = uops->sum * 1.0;
587		re = recv->sum * 1.0;
588		is = iss->sum * 1.0;
589		un = unhalt->sum * 1.0;
590	}
591	res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
592 	ret = printf("%1.3f", res);
593	return(ret);
594}
595
596
597static int
598br_mispredict_broad(struct counters *cpu, int pos)
599{
600	struct counters *brctr;
601	struct counters *unhalt;
602	struct counters *clear;
603	struct counters *uops;
604	struct counters *uops_ret;
605	struct counters *recv;
606	int ret;
607	double br, cl, uo, uo_r, re, con, un, res;
608
609	con = 4.0;
610
611	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
612        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
613	clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
614	uops = find_counter(cpu, "UOPS_ISSUED.ANY");
615	uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
616	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
617
618	if (pos != -1) {
619		un = unhalt->vals[pos] * 1.0;
620		br = brctr->vals[pos] * 1.0;
621		cl = clear->vals[pos] * 1.0;
622		uo = uops->vals[pos] * 1.0;
623		uo_r = uops_ret->vals[pos] * 1.0;
624		re = recv->vals[pos] * 1.0;
625	} else {
626		un = unhalt->sum * 1.0;
627		br = brctr->sum * 1.0;
628		cl = clear->sum * 1.0;
629		uo = uops->sum * 1.0;
630		uo_r = uops_ret->sum * 1.0;
631		re = recv->sum * 1.0;
632	}
633	res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
634 	ret = printf("%1.3f", res);
635	return(ret);
636}
637
638static int
639splitloadib(struct counters *cpu, int pos)
640{
641	int ret;
642	struct counters *mem;
643	struct counters *l1d, *ldblock;
644	struct counters *unhalt;
645	double un, memd, res, l1, ldb;
646        /*
647	 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
648	 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
649	 */
650
651	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
652	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
653	l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
654	ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
655	if (pos != -1) {
656		memd = mem->vals[pos] * 1.0;
657		l1 = l1d->vals[pos] * 1.0;
658		ldb = ldblock->vals[pos] * 1.0;
659		un = unhalt->vals[pos] * 1.0;
660	} else {
661		memd = mem->sum * 1.0;
662		l1 = l1d->sum * 1.0;
663		ldb = ldblock->sum * 1.0;
664		un = unhalt->sum * 1.0;
665	}
666	res = ((l1 / memd) * ldb)/un;
667	ret = printf("%1.3f", res);
668	return(ret);
669}
670
671
672static int
673splitload(struct counters *cpu, int pos)
674{
675	int ret;
676	struct counters *mem;
677	struct counters *unhalt;
678	double con, un, memd, res;
679/*  4  - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
680
681	con = 5.0;
682	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
683	mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
684	if (pos != -1) {
685		memd = mem->vals[pos] * 1.0;
686		un = unhalt->vals[pos] * 1.0;
687	} else {
688		memd = mem->sum * 1.0;
689		un = unhalt->sum * 1.0;
690	}
691	res = (memd * con)/un;
692	ret = printf("%1.3f", res);
693	return(ret);
694}
695
696
697static int
698splitload_sb(struct counters *cpu, int pos)
699{
700	int ret;
701	struct counters *mem;
702	struct counters *unhalt;
703	double con, un, memd, res;
704/*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
705
706	con = 5.0;
707	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
708	mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
709	if (pos != -1) {
710		memd = mem->vals[pos] * 1.0;
711		un = unhalt->vals[pos] * 1.0;
712	} else {
713		memd = mem->sum * 1.0;
714		un = unhalt->sum * 1.0;
715	}
716	res = (memd * con)/un;
717	ret = printf("%1.3f", res);
718	return(ret);
719}
720
721
722static int
723splitstore_sb(struct counters *cpu, int pos)
724{
725        /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
726	int ret;
727	struct counters *mem_split;
728	struct counters *mem_stores;
729	double memsplit, memstore, res;
730	mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
731	mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
732	if (pos != -1) {
733		memsplit = mem_split->vals[pos] * 1.0;
734		memstore = mem_stores->vals[pos] * 1.0;
735	} else {
736		memsplit = mem_split->sum * 1.0;
737		memstore = mem_stores->sum * 1.0;
738	}
739	res = memsplit/memstore;
740	ret = printf("%1.3f", res);
741	return(ret);
742}
743
744
745
746static int
747splitstore(struct counters *cpu, int pos)
748{
749        /*  5  - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
750	int ret;
751	struct counters *mem_split;
752	struct counters *mem_stores;
753	double memsplit, memstore, res;
754	mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
755	mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
756	if (pos != -1) {
757		memsplit = mem_split->vals[pos] * 1.0;
758		memstore = mem_stores->vals[pos] * 1.0;
759	} else {
760		memsplit = mem_split->sum * 1.0;
761		memstore = mem_stores->sum * 1.0;
762	}
763	res = memsplit/memstore;
764	ret = printf("%1.3f", res);
765	return(ret);
766}
767
768
769static int
770contested(struct counters *cpu, int pos)
771{
772        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
773	int ret;
774	struct counters *mem;
775	struct counters *unhalt;
776	double con, un, memd, res;
777
778	con = 60.0;
779	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
780	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
781	if (pos != -1) {
782		memd = mem->vals[pos] * 1.0;
783		un = unhalt->vals[pos] * 1.0;
784	} else {
785		memd = mem->sum * 1.0;
786		un = unhalt->sum * 1.0;
787	}
788	res = (memd * con)/un;
789	ret = printf("%1.3f", res);
790	return(ret);
791}
792
793static int
794contested_has(struct counters *cpu, int pos)
795{
796        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
797	int ret;
798	struct counters *mem;
799	struct counters *unhalt;
800	double con, un, memd, res;
801
802	con = 84.0;
803	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
804	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
805	if (pos != -1) {
806		memd = mem->vals[pos] * 1.0;
807		un = unhalt->vals[pos] * 1.0;
808	} else {
809		memd = mem->sum * 1.0;
810		un = unhalt->sum * 1.0;
811	}
812	res = (memd * con)/un;
813	ret = printf("%1.3f", res);
814	return(ret);
815}
816
817static int
818contestedbroad(struct counters *cpu, int pos)
819{
820        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
821	int ret;
822	struct counters *mem;
823	struct counters *mem2;
824	struct counters *unhalt;
825	double con, un, memd, memtoo, res;
826
827	con = 84.0;
828	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
829	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
830	mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
831
832	if (pos != -1) {
833		memd = mem->vals[pos] * 1.0;
834		memtoo = mem2->vals[pos] * 1.0;
835		un = unhalt->vals[pos] * 1.0;
836	} else {
837		memd = mem->sum * 1.0;
838		memtoo = mem2->sum * 1.0;
839		un = unhalt->sum * 1.0;
840	}
841	res = ((memd * con) + memtoo)/un;
842	ret = printf("%1.3f", res);
843	return(ret);
844}
845
846
847static int
848blockstoreforward(struct counters *cpu, int pos)
849{
850        /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
851	int ret;
852	struct counters *ldb;
853	struct counters *unhalt;
854	double con, un, ld, res;
855
856	con = 13.0;
857	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
858	ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
859	if (pos != -1) {
860		ld = ldb->vals[pos] * 1.0;
861		un = unhalt->vals[pos] * 1.0;
862	} else {
863		ld = ldb->sum * 1.0;
864		un = unhalt->sum * 1.0;
865	}
866	res = (ld * con)/un;
867	ret = printf("%1.3f", res);
868	return(ret);
869}
870
871static int
872cache2(struct counters *cpu, int pos)
873{
874	/* ** Suspect ***
875	 *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
876	 *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
877	 */
878	int ret;
879	struct counters *mem1, *mem2, *mem3;
880	struct counters *unhalt;
881	double con1, con2, con3, un, me_1, me_2, me_3, res;
882
883	con1 = 26.0;
884	con2 = 43.0;
885	con3 = 60.0;
886	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
887/* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
888	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
889	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
890	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
891	if (pos != -1) {
892		me_1 = mem1->vals[pos] * 1.0;
893		me_2 = mem2->vals[pos] * 1.0;
894		me_3 = mem3->vals[pos] * 1.0;
895		un = unhalt->vals[pos] * 1.0;
896	} else {
897		me_1 = mem1->sum * 1.0;
898		me_2 = mem2->sum * 1.0;
899		me_3 = mem3->sum * 1.0;
900		un = unhalt->sum * 1.0;
901	}
902	res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
903	ret = printf("%1.3f", res);
904	return(ret);
905}
906
907static int
908datasharing(struct counters *cpu, int pos)
909{
910	/*
911	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
912	 */
913	int ret;
914	struct counters *mem;
915	struct counters *unhalt;
916	double con, res, me, un;
917
918	con = 43.0;
919	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
920	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
921	if (pos != -1) {
922		me = mem->vals[pos] * 1.0;
923		un = unhalt->vals[pos] * 1.0;
924	} else {
925		me = mem->sum * 1.0;
926		un = unhalt->sum * 1.0;
927	}
928	res = (me * con)/un;
929	ret = printf("%1.3f", res);
930	return(ret);
931
932}
933
934
935static int
936datasharing_has(struct counters *cpu, int pos)
937{
938	/*
939	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
940	 */
941	int ret;
942	struct counters *mem;
943	struct counters *unhalt;
944	double con, res, me, un;
945
946	con = 72.0;
947	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
948	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
949	if (pos != -1) {
950		me = mem->vals[pos] * 1.0;
951		un = unhalt->vals[pos] * 1.0;
952	} else {
953		me = mem->sum * 1.0;
954		un = unhalt->sum * 1.0;
955	}
956	res = (me * con)/un;
957	ret = printf("%1.3f", res);
958	return(ret);
959
960}
961
962
963static int
964cache2ib(struct counters *cpu, int pos)
965{
966        /*
967	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
968	 */
969	int ret;
970	struct counters *mem;
971	struct counters *unhalt;
972	double con, un, me, res;
973
974	con = 29.0;
975	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
976	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
977	if (pos != -1) {
978		me = mem->vals[pos] * 1.0;
979		un = unhalt->vals[pos] * 1.0;
980	} else {
981		me = mem->sum * 1.0;
982		un = unhalt->sum * 1.0;
983	}
984	res = (con * me)/un;
985	ret = printf("%1.3f", res);
986	return(ret);
987}
988
989static int
990cache2has(struct counters *cpu, int pos)
991{
992	/*
993	 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
994	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
995	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
996	 *           / CPU_CLK_UNHALTED.THREAD_P
997	 */
998	int ret;
999	struct counters *mem1, *mem2, *mem3;
1000	struct counters *unhalt;
1001	double con1, con2, con3, un, me1, me2, me3, res;
1002
1003	con1 = 36.0;
1004	con2 = 72.0;
1005	con3 = 84.0;
1006	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1007	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
1008	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
1009	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
1010	if (pos != -1) {
1011		me1 = mem1->vals[pos] * 1.0;
1012		me2 = mem2->vals[pos] * 1.0;
1013		me3 = mem3->vals[pos] * 1.0;
1014		un = unhalt->vals[pos] * 1.0;
1015	} else {
1016		me1 = mem1->sum * 1.0;
1017		me2 = mem2->sum * 1.0;
1018		me3 = mem3->sum * 1.0;
1019		un = unhalt->sum * 1.0;
1020	}
1021	res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
1022	ret = printf("%1.3f", res);
1023	return(ret);
1024}
1025
1026
1027static int
1028cache2broad(struct counters *cpu, int pos)
1029{
1030        /*
1031	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
1032	 */
1033	int ret;
1034	struct counters *mem;
1035	struct counters *unhalt;
1036	double con, un, me, res;
1037
1038	con = 36.0;
1039	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1040	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
1041	if (pos != -1) {
1042		me = mem->vals[pos] * 1.0;
1043		un = unhalt->vals[pos] * 1.0;
1044	} else {
1045		me = mem->sum * 1.0;
1046		un = unhalt->sum * 1.0;
1047	}
1048	res = (con * me)/un;
1049	ret = printf("%1.3f", res);
1050	return(ret);
1051}
1052
1053
1054static int
1055cache1(struct counters *cpu, int pos)
1056{
1057	/*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1058	int ret;
1059	struct counters *mem;
1060	struct counters *unhalt;
1061	double con, un, me, res;
1062
1063	con = 180.0;
1064	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1065	mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
1066	if (pos != -1) {
1067		me = mem->vals[pos] * 1.0;
1068		un = unhalt->vals[pos] * 1.0;
1069	} else {
1070		me = mem->sum * 1.0;
1071		un = unhalt->sum * 1.0;
1072	}
1073	res = (me * con)/un;
1074	ret = printf("%1.3f", res);
1075	return(ret);
1076}
1077
1078static int
1079cache1ib(struct counters *cpu, int pos)
1080{
1081	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1082	int ret;
1083	struct counters *mem;
1084	struct counters *unhalt;
1085	double con, un, me, res;
1086
1087	con = 180.0;
1088	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1089	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
1090	if (pos != -1) {
1091		me = mem->vals[pos] * 1.0;
1092		un = unhalt->vals[pos] * 1.0;
1093	} else {
1094		me = mem->sum * 1.0;
1095		un = unhalt->sum * 1.0;
1096	}
1097	res = (me * con)/un;
1098	ret = printf("%1.3f", res);
1099	return(ret);
1100}
1101
1102
1103static int
1104cache1broad(struct counters *cpu, int pos)
1105{
1106	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1107	int ret;
1108	struct counters *mem;
1109	struct counters *unhalt;
1110	double con, un, me, res;
1111
1112	con = 180.0;
1113	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1114	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
1115	if (pos != -1) {
1116		me = mem->vals[pos] * 1.0;
1117		un = unhalt->vals[pos] * 1.0;
1118	} else {
1119		me = mem->sum * 1.0;
1120		un = unhalt->sum * 1.0;
1121	}
1122	res = (me * con)/un;
1123	ret = printf("%1.3f", res);
1124	return(ret);
1125}
1126
1127
1128static int
1129dtlb_missload(struct counters *cpu, int pos)
1130{
1131	/* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
1132	int ret;
1133	struct counters *dtlb_m, *dtlb_d;
1134	struct counters *unhalt;
1135	double con, un, d1, d2, res;
1136
1137	con = 7.0;
1138	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1139	dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
1140	dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
1141	if (pos != -1) {
1142		d1 = dtlb_m->vals[pos] * 1.0;
1143		d2 = dtlb_d->vals[pos] * 1.0;
1144		un = unhalt->vals[pos] * 1.0;
1145	} else {
1146		d1 = dtlb_m->sum * 1.0;
1147		d2 = dtlb_d->sum * 1.0;
1148		un = unhalt->sum * 1.0;
1149	}
1150	res = ((d1 * con) + d2)/un;
1151	ret = printf("%1.3f", res);
1152	return(ret);
1153}
1154
1155static int
1156dtlb_missstore(struct counters *cpu, int pos)
1157{
1158        /*
1159	 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
1160	 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
1161	 */
1162        int ret;
1163        struct counters *dtsb_m, *dtsb_d;
1164        struct counters *unhalt;
1165        double con, un, d1, d2, res;
1166
1167        con = 7.0;
1168        unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1169        dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
1170        dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
1171        if (pos != -1) {
1172                d1 = dtsb_m->vals[pos] * 1.0;
1173                d2 = dtsb_d->vals[pos] * 1.0;
1174                un = unhalt->vals[pos] * 1.0;
1175        } else {
1176                d1 = dtsb_m->sum * 1.0;
1177                d2 = dtsb_d->sum * 1.0;
1178                un = unhalt->sum * 1.0;
1179        }
1180        res = ((d1 * con) + d2)/un;
1181        ret = printf("%1.3f", res);
1182        return(ret);
1183}
1184
1185static int
1186itlb_miss(struct counters *cpu, int pos)
1187{
1188	/* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
1189	int ret;
1190	struct counters *itlb;
1191	struct counters *unhalt;
1192	double un, d1, res;
1193
1194	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1195	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1196	if (pos != -1) {
1197		d1 = itlb->vals[pos] * 1.0;
1198		un = unhalt->vals[pos] * 1.0;
1199	} else {
1200		d1 = itlb->sum * 1.0;
1201		un = unhalt->sum * 1.0;
1202	}
1203	res = d1/un;
1204	ret = printf("%1.3f", res);
1205	return(ret);
1206}
1207
1208
1209static int
1210itlb_miss_broad(struct counters *cpu, int pos)
1211{
1212	/* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P   */
1213	int ret;
1214	struct counters *itlb;
1215	struct counters *unhalt;
1216	struct counters *four_k;
1217	double un, d1, res, k;
1218
1219	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1220	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1221	four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
1222	if (pos != -1) {
1223		d1 = itlb->vals[pos] * 1.0;
1224		un = unhalt->vals[pos] * 1.0;
1225		k = four_k->vals[pos] * 1.0;
1226	} else {
1227		d1 = itlb->sum * 1.0;
1228		un = unhalt->sum * 1.0;
1229		k = four_k->sum * 1.0;
1230	}
1231	res = (7.0 * k + d1)/un;
1232	ret = printf("%1.3f", res);
1233	return(ret);
1234}
1235
1236
1237static int
1238icache_miss(struct counters *cpu, int pos)
1239{
1240	/* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1241
1242	int ret;
1243	struct counters *itlb, *icache;
1244	struct counters *unhalt;
1245	double un, d1, ic, res;
1246
1247	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1248	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1249	icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1250	if (pos != -1) {
1251		d1 = itlb->vals[pos] * 1.0;
1252		ic = icache->vals[pos] * 1.0;
1253		un = unhalt->vals[pos] * 1.0;
1254	} else {
1255		d1 = itlb->sum * 1.0;
1256		ic = icache->sum * 1.0;
1257		un = unhalt->sum * 1.0;
1258	}
1259	res = (ic-d1)/un;
1260	ret = printf("%1.3f", res);
1261	return(ret);
1262
1263}
1264
1265static int
1266icache_miss_has(struct counters *cpu, int pos)
1267{
1268	/* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1269
1270	int ret;
1271	struct counters *icache;
1272	struct counters *unhalt;
1273	double un, con, ic, res;
1274
1275	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1276	icache = find_counter(cpu, "ICACHE.MISSES");
1277	con = 36.0;
1278	if (pos != -1) {
1279		ic = icache->vals[pos] * 1.0;
1280		un = unhalt->vals[pos] * 1.0;
1281	} else {
1282		ic = icache->sum * 1.0;
1283		un = unhalt->sum * 1.0;
1284	}
1285	res = (con * ic)/un;
1286	ret = printf("%1.3f", res);
1287	return(ret);
1288
1289}
1290
1291static int
1292lcp_stall(struct counters *cpu, int pos)
1293{
1294         /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1295	int ret;
1296	struct counters *ild;
1297	struct counters *unhalt;
1298	double un, d1, res;
1299
1300	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1301	ild = find_counter(cpu, "ILD_STALL.LCP");
1302	if (pos != -1) {
1303		d1 = ild->vals[pos] * 1.0;
1304		un = unhalt->vals[pos] * 1.0;
1305	} else {
1306		d1 = ild->sum * 1.0;
1307		un = unhalt->sum * 1.0;
1308	}
1309	res = d1/un;
1310	ret = printf("%1.3f", res);
1311	return(ret);
1312
1313}
1314
1315
1316static int
1317frontendstall(struct counters *cpu, int pos)
1318{
1319      /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1320	int ret;
1321	struct counters *idq;
1322	struct counters *unhalt;
1323	double con, un, id, res;
1324
1325	con = 4.0;
1326	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1327	idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1328	if (pos != -1) {
1329		id = idq->vals[pos] * 1.0;
1330		un = unhalt->vals[pos] * 1.0;
1331	} else {
1332		id = idq->sum * 1.0;
1333		un = unhalt->sum * 1.0;
1334	}
1335	res = id/(un * con);
1336	ret = printf("%1.3f", res);
1337	return(ret);
1338}
1339
1340static int
1341clears(struct counters *cpu, int pos)
1342{
1343	/* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1344	 *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
1345
1346	int ret;
1347	struct counters *clr1, *clr2, *clr3;
1348	struct counters *unhalt;
1349	double con, un, cl1, cl2, cl3, res;
1350
1351	con = 100.0;
1352	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1353	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1354	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1355	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1356
1357	if (pos != -1) {
1358		cl1 = clr1->vals[pos] * 1.0;
1359		cl2 = clr2->vals[pos] * 1.0;
1360		cl3 = clr3->vals[pos] * 1.0;
1361		un = unhalt->vals[pos] * 1.0;
1362	} else {
1363		cl1 = clr1->sum * 1.0;
1364		cl2 = clr2->sum * 1.0;
1365		cl3 = clr3->sum * 1.0;
1366		un = unhalt->sum * 1.0;
1367	}
1368	res = ((cl1 + cl2 + cl3) * con)/un;
1369	ret = printf("%1.3f", res);
1370	return(ret);
1371}
1372
1373
1374
1375static int
1376clears_broad(struct counters *cpu, int pos)
1377{
1378	int ret;
1379	struct counters *clr1, *clr2, *clr3, *cyc;
1380	struct counters *unhalt;
1381	double con, un, cl1, cl2, cl3, cy, res;
1382
1383	con = 100.0;
1384	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1385	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1386	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1387	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1388	cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
1389	if (pos != -1) {
1390		cl1 = clr1->vals[pos] * 1.0;
1391		cl2 = clr2->vals[pos] * 1.0;
1392		cl3 = clr3->vals[pos] * 1.0;
1393		cy = cyc->vals[pos] * 1.0;
1394		un = unhalt->vals[pos] * 1.0;
1395	} else {
1396		cl1 = clr1->sum * 1.0;
1397		cl2 = clr2->sum * 1.0;
1398		cl3 = clr3->sum * 1.0;
1399		cy = cyc->sum * 1.0;
1400		un = unhalt->sum * 1.0;
1401	}
1402	/* Formula not listed but extrapulated to add the cy ?? */
1403	res = ((cl1 + cl2 + cl3 + cy) * con)/un;
1404	ret = printf("%1.3f", res);
1405	return(ret);
1406}
1407
1408
1409
1410
1411
1412static int
1413microassist(struct counters *cpu, int pos)
1414{
1415	/* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1416	int ret;
1417	struct counters *idq;
1418	struct counters *unhalt;
1419	double un, id, res, con;
1420
1421	con = 4.0;
1422	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1423	idq = find_counter(cpu, "IDQ.MS_UOPS");
1424	if (pos != -1) {
1425		id = idq->vals[pos] * 1.0;
1426		un = unhalt->vals[pos] * 1.0;
1427	} else {
1428		id = idq->sum * 1.0;
1429		un = unhalt->sum * 1.0;
1430	}
1431	res = id/(un * con);
1432	ret = printf("%1.3f", res);
1433	return(ret);
1434}
1435
1436
1437static int
1438microassist_broad(struct counters *cpu, int pos)
1439{
1440	int ret;
1441	struct counters *idq;
1442	struct counters *unhalt;
1443	struct counters *uopiss;
1444	struct counters *uopret;
1445	double un, id, res, con, uoi, uor;
1446
1447	con = 4.0;
1448	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1449	idq = find_counter(cpu, "IDQ.MS_UOPS");
1450	uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
1451	uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1452	if (pos != -1) {
1453		id = idq->vals[pos] * 1.0;
1454		un = unhalt->vals[pos] * 1.0;
1455		uoi = uopiss->vals[pos] * 1.0;
1456		uor = uopret->vals[pos] * 1.0;
1457	} else {
1458		id = idq->sum * 1.0;
1459		un = unhalt->sum * 1.0;
1460		uoi = uopiss->sum * 1.0;
1461		uor = uopret->sum * 1.0;
1462	}
1463	res = (uor/uoi) * (id/(un * con));
1464	ret = printf("%1.3f", res);
1465	return(ret);
1466}
1467
1468
1469static int
1470aliasing(struct counters *cpu, int pos)
1471{
1472	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1473	int ret;
1474	struct counters *ld;
1475	struct counters *unhalt;
1476	double un, lds, con, res;
1477
1478	con = 5.0;
1479	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1480	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1481	if (pos != -1) {
1482		lds = ld->vals[pos] * 1.0;
1483		un = unhalt->vals[pos] * 1.0;
1484	} else {
1485		lds = ld->sum * 1.0;
1486		un = unhalt->sum * 1.0;
1487	}
1488	res = (lds * con)/un;
1489	ret = printf("%1.3f", res);
1490	return(ret);
1491}
1492
1493static int
1494aliasing_broad(struct counters *cpu, int pos)
1495{
1496	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1497	int ret;
1498	struct counters *ld;
1499	struct counters *unhalt;
1500	double un, lds, con, res;
1501
1502	con = 7.0;
1503	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1504	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1505	if (pos != -1) {
1506		lds = ld->vals[pos] * 1.0;
1507		un = unhalt->vals[pos] * 1.0;
1508	} else {
1509		lds = ld->sum * 1.0;
1510		un = unhalt->sum * 1.0;
1511	}
1512	res = (lds * con)/un;
1513	ret = printf("%1.3f", res);
1514	return(ret);
1515}
1516
1517
1518static int
1519fpassists(struct counters *cpu, int pos)
1520{
1521	/* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1522	int ret;
1523	struct counters *fp;
1524	struct counters *inst;
1525	double un, fpd, res;
1526
1527	inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1528	fp = find_counter(cpu, "FP_ASSIST.ANY");
1529	if (pos != -1) {
1530		fpd = fp->vals[pos] * 1.0;
1531		un = inst->vals[pos] * 1.0;
1532	} else {
1533		fpd = fp->sum * 1.0;
1534		un = inst->sum * 1.0;
1535	}
1536	res = fpd/un;
1537	ret = printf("%1.3f", res);
1538	return(ret);
1539}
1540
1541static int
1542otherassistavx(struct counters *cpu, int pos)
1543{
1544	/* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
1545	int ret;
1546	struct counters *oth;
1547	struct counters *unhalt;
1548	double un, ot, con, res;
1549
1550	con = 75.0;
1551	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1552	oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1553	if (pos != -1) {
1554		ot = oth->vals[pos] * 1.0;
1555		un = unhalt->vals[pos] * 1.0;
1556	} else {
1557		ot = oth->sum * 1.0;
1558		un = unhalt->sum * 1.0;
1559	}
1560	res = (ot * con)/un;
1561	ret = printf("%1.3f", res);
1562	return(ret);
1563}
1564
1565static int
1566otherassistsse(struct counters *cpu, int pos)
1567{
1568
1569	int ret;
1570	struct counters *oth;
1571	struct counters *unhalt;
1572	double un, ot, con, res;
1573
1574	/* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
1575	con = 75.0;
1576	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1577	oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1578	if (pos != -1) {
1579		ot = oth->vals[pos] * 1.0;
1580		un = unhalt->vals[pos] * 1.0;
1581	} else {
1582		ot = oth->sum * 1.0;
1583		un = unhalt->sum * 1.0;
1584	}
1585	res = (ot * con)/un;
1586	ret = printf("%1.3f", res);
1587	return(ret);
1588}
1589
1590static int
1591efficiency1(struct counters *cpu, int pos)
1592{
1593
1594	int ret;
1595	struct counters *uops;
1596	struct counters *unhalt;
1597	double un, ot, con, res;
1598
1599        /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1600	con = 4.0;
1601	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1602	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1603	if (pos != -1) {
1604		ot = uops->vals[pos] * 1.0;
1605		un = unhalt->vals[pos] * 1.0;
1606	} else {
1607		ot = uops->sum * 1.0;
1608		un = unhalt->sum * 1.0;
1609	}
1610	res = ot/(con * un);
1611	ret = printf("%1.3f", res);
1612	return(ret);
1613}
1614
1615static int
1616efficiency2(struct counters *cpu, int pos)
1617{
1618
1619	int ret;
1620	struct counters *uops;
1621	struct counters *unhalt;
1622	double un, ot, res;
1623
1624        /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1625	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1626	uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1627	if (pos != -1) {
1628		ot = uops->vals[pos] * 1.0;
1629		un = unhalt->vals[pos] * 1.0;
1630	} else {
1631		ot = uops->sum * 1.0;
1632		un = unhalt->sum * 1.0;
1633	}
1634	res = un/ot;
1635	ret = printf("%1.3f", res);
1636	return(ret);
1637}
1638
1639#define SANDY_BRIDGE_COUNT 20
1640static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1641/*01*/	{ "allocstall1", "thresh > .05",
1642	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1643	  allocstall1, 2 },
1644/* -- not defined for SB right (partial-rat_stalls) 02*/
1645        { "allocstall2", "thresh > .05",
1646	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
1647	  allocstall2, 2 },
1648/*03*/	{ "br_miss", "thresh >= .2",
1649	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1650	  br_mispredict, 2 },
1651/*04*/	{ "splitload", "thresh >= .1",
1652	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1653	  splitload_sb, 2 },
1654/* 05*/	{ "splitstore", "thresh >= .01",
1655	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1656	  splitstore_sb, 2 },
1657/*06*/	{ "contested", "thresh >= .05",
1658	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1659	  contested, 2 },
1660/*07*/	{ "blockstorefwd", "thresh >= .05",
1661	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1662	  blockstoreforward, 2 },
1663/*08*/	{ "cache2", "thresh >= .2",
1664	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1665	  cache2, 4 },
1666/*09*/	{ "cache1", "thresh >= .2",
1667	  "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1668	  cache1, 2 },
1669/*10*/	{ "dtlbmissload", "thresh >= .1",
1670	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1671	  dtlb_missload, 3 },
1672/*11*/	{ "dtlbmissstore", "thresh >= .05",
1673	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1674	  dtlb_missstore, 3 },
1675/*12*/	{ "frontendstall", "thresh >= .15",
1676	  "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1677	  frontendstall, 2 },
1678/*13*/	{ "clears", "thresh >= .02",
1679	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1680	  clears, 4 },
1681/*14*/	{ "microassist", "thresh >= .05",
1682	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1683	  microassist, 2 },
1684/*15*/	{ "aliasing_4k", "thresh >= .1",
1685	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1686	  aliasing, 2 },
1687/*16*/	{ "fpassist", "look for a excessive value",
1688	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1689	  fpassists, 2 },
1690/*17*/	{ "otherassistavx", "look for a excessive value",
1691	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1692	  otherassistavx, 2},
1693/*18*/	{ "otherassistsse", "look for a excessive value",
1694	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1695	  otherassistsse, 2 },
1696/*19*/	{ "eff1", "thresh < .9",
1697	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1698	  efficiency1, 2 },
1699/*20*/	{ "eff2", "thresh > 1.0",
1700	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1701	  efficiency2, 2 },
1702};
1703
1704
1705#define IVY_BRIDGE_COUNT 21
1706static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1707/*1*/	{ "eff1", "thresh < .75",
1708	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1709	  efficiency1, 2 },
1710/*2*/	{ "eff2", "thresh > 1.0",
1711	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1712	  efficiency2, 2 },
1713/*3*/	{ "itlbmiss", "thresh > .05",
1714	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1715	  itlb_miss, 2 },
1716/*4*/	{ "icachemiss", "thresh > .05",
1717	  "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1718	  icache_miss, 3 },
1719/*5*/	{ "lcpstall", "thresh > .05",
1720	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1721	  lcp_stall, 2 },
1722/*6*/	{ "cache1", "thresh >= .2",
1723	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1724	  cache1ib, 2 },
1725/*7*/	{ "cache2", "thresh >= .2",
1726	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1727	  cache2ib, 2 },
1728/*8*/	{ "contested", "thresh >= .05",
1729	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1730	  contested, 2 },
1731/*9*/	{ "datashare", "thresh >= .05",
1732	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1733	  datasharing, 2 },
1734/*10*/	{ "blockstorefwd", "thresh >= .05",
1735	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1736	  blockstoreforward, 2 },
1737/*11*/	{ "splitload", "thresh >= .1",
1738	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1739	  splitloadib, 4 },
1740/*12*/	{ "splitstore", "thresh >= .01",
1741	  "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1742	  splitstore, 2 },
1743/*13*/	{ "aliasing_4k", "thresh >= .1",
1744	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1745	  aliasing, 2 },
1746/*14*/	{ "dtlbmissload", "thresh >= .1",
1747	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1748	  dtlb_missload , 3},
1749/*15*/	{ "dtlbmissstore", "thresh >= .05",
1750	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1751	  dtlb_missstore, 3 },
1752/*16*/	{ "br_miss", "thresh >= .2",
1753	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1754	  br_mispredictib, 8 },
1755/*17*/	{ "clears", "thresh >= .02",
1756	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1757	  clears, 4 },
1758/*18*/	{ "microassist", "thresh >= .05",
1759	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1760	  microassist, 2 },
1761/*19*/	{ "fpassist", "look for a excessive value",
1762	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1763	  fpassists, 2 },
1764/*20*/	{ "otherassistavx", "look for a excessive value",
1765	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1766	  otherassistavx , 2},
1767/*21*/	{ "otherassistsse", "look for a excessive value",
1768	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1769	  otherassistsse, 2 },
1770};
1771
1772#define HASWELL_COUNT 20
1773static struct cpu_entry haswell[HASWELL_COUNT] = {
1774/*1*/	{ "eff1", "thresh < .75",
1775	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1776	  efficiency1, 2 },
1777/*2*/	{ "eff2", "thresh > 1.0",
1778	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1779	  efficiency2, 2 },
1780/*3*/	{ "itlbmiss", "thresh > .05",
1781	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1782	  itlb_miss, 2 },
1783/*4*/	{ "icachemiss", "thresh > .05",
1784	  "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1785	  icache_miss_has, 2 },
1786/*5*/	{ "lcpstall", "thresh > .05",
1787	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1788	  lcp_stall, 2 },
1789/*6*/	{ "cache1", "thresh >= .2",
1790	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1791	  cache1ib, 2 },
1792/*7*/	{ "cache2", "thresh >= .2",
1793	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1794	  cache2has, 4 },
1795/*8*/	{ "contested", "thresh >= .05",
1796	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1797	  contested_has, 2 },
1798/*9*/	{ "datashare", "thresh >= .05",
1799	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1800	  datasharing_has, 2 },
1801/*10*/	{ "blockstorefwd", "thresh >= .05",
1802	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1803	  blockstoreforward, 2 },
1804/*11*/	{ "splitload", "thresh >= .1",
1805	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
1806	  splitload , 2},
1807/*12*/	{ "splitstore", "thresh >= .01",
1808	  "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1809	  splitstore, 2 },
1810/*13*/	{ "aliasing_4k", "thresh >= .1",
1811	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1812	  aliasing, 2 },
1813/*14*/	{ "dtlbmissload", "thresh >= .1",
1814	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1815	  dtlb_missload, 3 },
1816/*15*/	{ "br_miss", "thresh >= .2",
1817	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1818	  br_mispredict, 2 },
1819/*16*/	{ "clears", "thresh >= .02",
1820	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1821	  clears, 4 },
1822/*17*/	{ "microassist", "thresh >= .05",
1823	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1824	  microassist, 2 },
1825/*18*/	{ "fpassist", "look for a excessive value",
1826	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1827	  fpassists, 2 },
1828/*19*/	{ "otherassistavx", "look for a excessive value",
1829	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1830	  otherassistavx, 2 },
1831/*20*/	{ "otherassistsse", "look for a excessive value",
1832	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1833	  otherassistsse, 2 },
1834};
1835
1836
1837static void
1838explain_name_broad(const char *name)
1839{
1840	const char *mythresh;
1841	if (strcmp(name, "eff1") == 0) {
1842		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
1843		mythresh = "thresh < .75";
1844	} else if (strcmp(name, "eff2") == 0) {
1845		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
1846		mythresh = "thresh > 1.0";
1847	} else if (strcmp(name, "itlbmiss") == 0) {
1848		printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
1849		mythresh = "thresh > .05";
1850	} else if (strcmp(name, "icachemiss") == 0) {
1851		printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
1852		mythresh = "thresh > .05";
1853	} else if (strcmp(name, "lcpstall") == 0) {
1854		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
1855		mythresh = "thresh > .05";
1856	} else if (strcmp(name, "cache1") == 0) {
1857		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
1858		mythresh = "thresh >= .1";
1859	} else if (strcmp(name, "cache2") == 0) {
1860		printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
1861		mythresh = "thresh >= .2";
1862	} else if (strcmp(name, "contested") == 0) {
1863		printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) +  MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
1864		mythresh = "thresh >= .05";
1865	} else if (strcmp(name, "datashare") == 0) {
1866		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
1867		mythresh = "thresh > .05";
1868	} else if (strcmp(name, "blockstorefwd") == 0) {
1869		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
1870		mythresh = "thresh >= .05";
1871	} else if (strcmp(name, "aliasing_4k") == 0) {
1872		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
1873		mythresh = "thresh >= .1";
1874	} else if (strcmp(name, "dtlbmissload") == 0) {
1875		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
1876		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
1877		mythresh = "thresh >= .1";
1878
1879	} else if (strcmp(name, "br_miss") == 0) {
1880		printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
1881		printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
1882		printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
1883		mythresh = "thresh >= .2";
1884	} else if (strcmp(name, "clears") == 0) {
1885		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
1886		printf("          MACHINE_CLEARS.SMC + \n");
1887		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
1888		mythresh = "thresh >= .02";
1889	} else if (strcmp(name, "fpassist") == 0) {
1890		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
1891		mythresh = "look for a excessive value";
1892	} else if (strcmp(name, "otherassistavx") == 0) {
1893		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
1894		mythresh = "look for a excessive value";
1895	} else if (strcmp(name, "microassist") == 0) {
1896		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
1897		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
1898		mythresh = "thresh >= .05";
1899	} else {
1900		printf("Unknown name:%s\n", name);
1901		mythresh = "unknown entry";
1902        }
1903	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
1904}
1905
1906
1907#define BROADWELL_COUNT 17
1908static struct cpu_entry broadwell[BROADWELL_COUNT] = {
1909/*1*/	{ "eff1", "thresh < .75",
1910	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1911	  efficiency1, 2 },
1912/*2*/	{ "eff2", "thresh > 1.0",
1913	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1914	  efficiency2, 2 },
1915/*3*/	{ "itlbmiss", "thresh > .05",
1916	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
1917	  itlb_miss_broad, 3 },
1918/*4*/	{ "icachemiss", "thresh > .05",
1919	  "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1920	  icache_miss_has, 2 },
1921/*5*/	{ "lcpstall", "thresh > .05",
1922	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1923	  lcp_stall, 2 },
1924/*6*/	{ "cache1", "thresh >= .1",
1925	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1926	  cache1broad, 2 },
1927/*7*/	{ "cache2", "thresh >= .2",
1928	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1929	  cache2broad, 2 },
1930/*8*/	{ "contested", "thresh >= .05",
1931	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
1932	  contestedbroad, 2 },
1933/*9*/	{ "datashare", "thresh >= .05",
1934	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1935	  datasharing_has, 2 },
1936/*10*/	{ "blockstorefwd", "thresh >= .05",
1937	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1938	  blockstoreforward, 2 },
1939/*11*/	{ "aliasing_4k", "thresh >= .1",
1940	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1941	  aliasing_broad, 2 },
1942/*12*/	{ "dtlbmissload", "thresh >= .1",
1943	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1944	  dtlb_missload, 3 },
1945/*13*/	{ "br_miss", "thresh >= .2",
1946	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1947	  br_mispredict_broad, 7 },
1948/*14*/	{ "clears", "thresh >= .02",
1949	  "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1950	  clears_broad, 5 },
1951/*15*/	{ "fpassist", "look for a excessive value",
1952	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1953	  fpassists, 2 },
1954/*16*/	{ "otherassistavx", "look for a excessive value",
1955	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1956	  otherassistavx, 2 },
1957/*17*/	{ "microassist", "thresh >= .2",
1958	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS  -w 1",
1959	  microassist_broad, 4 },
1960};
1961
1962
1963static void
1964set_sandybridge(void)
1965{
1966	strcpy(the_cpu.cputype, "SandyBridge PMC");
1967	the_cpu.number = SANDY_BRIDGE_COUNT;
1968	the_cpu.ents = sandy_bridge;
1969	the_cpu.explain = explain_name_sb;
1970}
1971
1972static void
1973set_ivybridge(void)
1974{
1975	strcpy(the_cpu.cputype, "IvyBridge PMC");
1976	the_cpu.number = IVY_BRIDGE_COUNT;
1977	the_cpu.ents = ivy_bridge;
1978	the_cpu.explain = explain_name_ib;
1979}
1980
1981
1982static void
1983set_haswell(void)
1984{
1985	strcpy(the_cpu.cputype, "HASWELL PMC");
1986	the_cpu.number = HASWELL_COUNT;
1987	the_cpu.ents = haswell;
1988	the_cpu.explain = explain_name_has;
1989}
1990
1991
1992static void
1993set_broadwell(void)
1994{
1995	strcpy(the_cpu.cputype, "HASWELL PMC");
1996	the_cpu.number = BROADWELL_COUNT;
1997	the_cpu.ents = broadwell;
1998	the_cpu.explain = explain_name_broad;
1999}
2000
2001
2002static int
2003set_expression(const char *name)
2004{
2005	int found = 0, i;
2006	for(i=0 ; i< the_cpu.number; i++) {
2007		if (strcmp(name, the_cpu.ents[i].name) == 0) {
2008			found = 1;
2009			expression = the_cpu.ents[i].func;
2010			command = the_cpu.ents[i].command;
2011			threshold = the_cpu.ents[i].thresh;
2012			if  (the_cpu.ents[i].counters_required > max_pmc_counters) {
2013				printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
2014				       the_cpu.ents[i].name,
2015				       the_cpu.ents[i].counters_required, max_pmc_counters);
2016				printf("Sorry this test can not be run\n");
2017				if (run_all == 0) {
2018					exit(-1);
2019				} else {
2020					return(-1);
2021				}
2022			}
2023			break;
2024		}
2025	}
2026	if (!found) {
2027		printf("For CPU type %s we have no expression:%s\n",
2028		       the_cpu.cputype, name);
2029		exit(-1);
2030	}
2031	return(0);
2032}
2033
2034
2035
2036
2037
2038static int
2039validate_expression(char *name)
2040{
2041	int i, found;
2042
2043	found = 0;
2044	for(i=0 ; i< the_cpu.number; i++) {
2045		if (strcmp(name, the_cpu.ents[i].name) == 0) {
2046			found = 1;
2047			break;
2048		}
2049	}
2050	if (!found) {
2051		return(-1);
2052	}
2053	return (0);
2054}
2055
2056static void
2057do_expression(struct counters *cpu, int pos)
2058{
2059	if (expression == NULL)
2060		return;
2061	(*expression)(cpu, pos);
2062}
2063
2064static void
2065process_header(int idx, char *p)
2066{
2067	struct counters *up;
2068	int i, len, nlen;
2069	/*
2070	 * Given header element idx, at p in
2071	 * form 's/NN/nameof'
2072	 * process the entry to pull out the name and
2073	 * the CPU number.
2074	 */
2075	if (strncmp(p, "s/", 2)) {
2076		printf("Check -- invalid header no s/ in %s\n",
2077		       p);
2078		return;
2079	}
2080	up = &cnts[idx];
2081	up->cpu = strtol(&p[2], NULL, 10);
2082	len = strlen(p);
2083	for (i=2; i<len; i++) {
2084		if (p[i] == '/') {
2085			nlen = strlen(&p[(i+1)]);
2086			if (nlen < (MAX_NLEN-1)) {
2087				strcpy(up->counter_name, &p[(i+1)]);
2088			} else {
2089				strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
2090			}
2091		}
2092	}
2093}
2094
2095static void
2096build_counters_from_header(FILE *io)
2097{
2098	char buffer[8192], *p;
2099	int i, len, cnt;
2100	size_t mlen;
2101
2102	/* We have a new start, lets
2103	 * setup our headers and cpus.
2104	 */
2105	if (fgets(buffer, sizeof(buffer), io) == NULL) {
2106		printf("First line can't be read from file err:%d\n", errno);
2107		return;
2108	}
2109	/*
2110	 * Ok output is an array of counters. Once
2111	 * we start to read the values in we must
2112	 * put them in there slot to match there CPU and
2113	 * counter being updated. We create a mass array
2114	 * of the counters, filling in the CPU and
2115	 * counter name.
2116	 */
2117	/* How many do we get? */
2118	len = strlen(buffer);
2119	for (i=0, cnt=0; i<len; i++) {
2120		if (strncmp(&buffer[i], "s/", 2) == 0) {
2121			cnt++;
2122			for(;i<len;i++) {
2123				if (buffer[i] == ' ')
2124					break;
2125			}
2126		}
2127	}
2128	mlen = sizeof(struct counters) * cnt;
2129	cnts = malloc(mlen);
2130	ncnts = cnt;
2131	if (cnts == NULL) {
2132		printf("No memory err:%d\n", errno);
2133		return;
2134	}
2135	memset(cnts, 0, mlen);
2136	for (i=0, cnt=0; i<len; i++) {
2137		if (strncmp(&buffer[i], "s/", 2) == 0) {
2138			p = &buffer[i];
2139			for(;i<len;i++) {
2140				if (buffer[i] == ' ') {
2141					buffer[i] = 0;
2142					break;
2143				}
2144			}
2145			process_header(cnt, p);
2146			cnt++;
2147		}
2148	}
2149	if (verbose)
2150		printf("We have %d entries\n", cnt);
2151}
2152extern int max_to_collect;
2153int max_to_collect = MAX_COUNTER_SLOTS;
2154
2155static int
2156read_a_line(FILE *io)
2157{
2158	char buffer[8192], *p, *stop;
2159	int pos, i;
2160
2161	if (fgets(buffer, sizeof(buffer), io) == NULL) {
2162		return(0);
2163	}
2164	p = buffer;
2165	for (i=0; i<ncnts; i++) {
2166		pos = cnts[i].pos;
2167		cnts[i].vals[pos] = strtol(p, &stop, 0);
2168		cnts[i].pos++;
2169		cnts[i].sum += cnts[i].vals[pos];
2170		p = stop;
2171	}
2172	return (1);
2173}
2174
2175extern int cpu_count_out;
2176int cpu_count_out=0;
2177
2178static void
2179print_header(void)
2180{
2181	int i, cnt, printed_cnt;
2182
2183	printf("*********************************\n");
2184	for(i=0, cnt=0; i<MAX_CPU; i++) {
2185		if (glob_cpu[i]) {
2186			cnt++;
2187		}
2188	}
2189	cpu_count_out = cnt;
2190	for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
2191		if (glob_cpu[i]) {
2192			printf("CPU%d", i);
2193			printed_cnt++;
2194		}
2195		if (printed_cnt == cnt) {
2196			printf("\n");
2197			break;
2198		} else {
2199			printf("\t");
2200		}
2201	}
2202}
2203
2204static void
2205lace_cpus_together(void)
2206{
2207	int i, j, lace_cpu;
2208	struct counters *cpat, *at;
2209
2210	for(i=0; i<ncnts; i++) {
2211		cpat = &cnts[i];
2212		if (cpat->next_cpu) {
2213			/* Already laced in */
2214			continue;
2215		}
2216		lace_cpu = cpat->cpu;
2217		if (lace_cpu >= MAX_CPU) {
2218			printf("CPU %d to big\n", lace_cpu);
2219			continue;
2220		}
2221		if (glob_cpu[lace_cpu] == NULL) {
2222			glob_cpu[lace_cpu] = cpat;
2223		} else {
2224			/* Already processed this cpu */
2225			continue;
2226		}
2227		/* Ok look forward for cpu->cpu and link in */
2228		for(j=(i+1); j<ncnts; j++) {
2229			at = &cnts[j];
2230			if (at->next_cpu) {
2231				continue;
2232			}
2233			if (at->cpu == lace_cpu) {
2234				/* Found one */
2235				cpat->next_cpu = at;
2236				cpat = at;
2237			}
2238		}
2239	}
2240}
2241
2242
2243static void
2244process_file(char *filename)
2245{
2246	FILE *io;
2247	int i;
2248	int line_at, not_done;
2249	pid_t pid_of_command=0;
2250
2251	if (filename ==  NULL) {
2252		io = my_popen(command, "r", &pid_of_command);
2253	} else {
2254		io = fopen(filename, "r");
2255		if (io == NULL) {
2256			printf("Can't process file %s err:%d\n",
2257			       filename, errno);
2258			return;
2259		}
2260	}
2261	build_counters_from_header(io);
2262	if (cnts == NULL) {
2263		/* Nothing we can do */
2264		printf("Nothing to do -- no counters built\n");
2265		if (io) {
2266			fclose(io);
2267		}
2268		return;
2269	}
2270	lace_cpus_together();
2271	print_header();
2272	if (verbose) {
2273		for (i=0; i<ncnts; i++) {
2274			printf("Counter:%s cpu:%d index:%d\n",
2275			       cnts[i].counter_name,
2276			       cnts[i].cpu, i);
2277		}
2278	}
2279	line_at = 0;
2280	not_done = 1;
2281	while(not_done) {
2282		if (read_a_line(io)) {
2283			line_at++;
2284		} else {
2285			break;
2286		}
2287		if (line_at >= max_to_collect) {
2288			not_done = 0;
2289		}
2290		if (filename == NULL) {
2291			int cnt;
2292			/* For the ones we dynamically open we print now */
2293			for(i=0, cnt=0; i<MAX_CPU; i++) {
2294				do_expression(glob_cpu[i], (line_at-1));
2295				cnt++;
2296				if (cnt == cpu_count_out) {
2297					printf("\n");
2298					break;
2299				} else {
2300					printf("\t");
2301				}
2302			}
2303		}
2304	}
2305	if (filename) {
2306		fclose(io);
2307	} else {
2308		my_pclose(io, pid_of_command);
2309	}
2310}
2311#if defined(__amd64__)
2312#define cpuid(in,a,b,c,d)\
2313  asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
2314
2315static __inline void
2316do_cpuid(u_int ax, u_int cx, u_int *p)
2317{
2318	__asm __volatile("cpuid"
2319			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
2320			 :  "0" (ax), "c" (cx) );
2321}
2322
2323#else
2324#define cpuid(in, a, b, c, d)
2325#define do_cpuid(ax, cx, p)
2326#endif
2327
2328static void
2329get_cpuid_set(void)
2330{
2331	unsigned long eax, ebx, ecx, edx;
2332	int model;
2333	pid_t pid_of_command=0;
2334	size_t sz, len;
2335	FILE *io;
2336	char linebuf[1024], *str;
2337	u_int reg[4];
2338
2339	eax = ebx = ecx = edx = 0;
2340
2341	cpuid(0, eax, ebx, ecx, edx);
2342	if (ebx == 0x68747541) {
2343		printf("AMD processors are not supported by this program\n");
2344		printf("Sorry\n");
2345		exit(0);
2346	} else if (ebx == 0x6972794) {
2347		printf("Cyrix processors are not supported by this program\n");
2348		printf("Sorry\n");
2349		exit(0);
2350	} else if (ebx == 0x756e6547) {
2351		printf("Genuine Intel\n");
2352	} else {
2353		printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
2354		exit(0);
2355	}
2356	cpuid(1, eax, ebx, ecx, edx);
2357	model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
2358	printf("CPU model is 0x%x id:0x%lx\n", model, eax);
2359	switch (eax & 0xF00) {
2360	case 0x500:		/* Pentium family processors */
2361		printf("Intel Pentium P5\n");
2362		goto not_supported;
2363		break;
2364	case 0x600:		/* Pentium Pro, Celeron, Pentium II & III */
2365		switch (model) {
2366		case 0x1:
2367			printf("Intel Pentium P6\n");
2368			goto not_supported;
2369			break;
2370		case 0x3:
2371		case 0x5:
2372			printf("Intel PII\n");
2373			goto not_supported;
2374			break;
2375		case 0x6: case 0x16:
2376			printf("Intel CL\n");
2377			goto not_supported;
2378			break;
2379		case 0x7: case 0x8: case 0xA: case 0xB:
2380			printf("Intel PIII\n");
2381			goto not_supported;
2382			break;
2383		case 0x9: case 0xD:
2384			printf("Intel PM\n");
2385			goto not_supported;
2386			break;
2387		case 0xE:
2388			printf("Intel CORE\n");
2389			goto not_supported;
2390			break;
2391		case 0xF:
2392			printf("Intel CORE2\n");
2393			goto not_supported;
2394			break;
2395		case 0x17:
2396			printf("Intel CORE2EXTREME\n");
2397			goto not_supported;
2398			break;
2399		case 0x1C:	/* Per Intel document 320047-002. */
2400			printf("Intel ATOM\n");
2401			goto not_supported;
2402			break;
2403		case 0x1A:
2404		case 0x1E:	/*
2405				 * Per Intel document 253669-032 9/2009,
2406				 * pages A-2 and A-57
2407				 */
2408		case 0x1F:	/*
2409				 * Per Intel document 253669-032 9/2009,
2410				 * pages A-2 and A-57
2411				 */
2412			printf("Intel COREI7\n");
2413			goto not_supported;
2414			break;
2415		case 0x2E:
2416			printf("Intel NEHALEM\n");
2417			goto not_supported;
2418			break;
2419		case 0x25:	/* Per Intel document 253669-033US 12/2009. */
2420		case 0x2C:	/* Per Intel document 253669-033US 12/2009. */
2421			printf("Intel WESTMERE\n");
2422			goto not_supported;
2423			break;
2424		case 0x2F:	/* Westmere-EX, seen in wild */
2425			printf("Intel WESTMERE\n");
2426			goto not_supported;
2427			break;
2428		case 0x2A:	/* Per Intel document 253669-039US 05/2011. */
2429			printf("Intel SANDYBRIDGE\n");
2430			set_sandybridge();
2431			break;
2432		case 0x2D:	/* Per Intel document 253669-044US 08/2012. */
2433			printf("Intel SANDYBRIDGE_XEON\n");
2434			set_sandybridge();
2435			break;
2436		case 0x3A:	/* Per Intel document 253669-043US 05/2012. */
2437			printf("Intel IVYBRIDGE\n");
2438			set_ivybridge();
2439			break;
2440		case 0x3E:	/* Per Intel document 325462-045US 01/2013. */
2441			printf("Intel IVYBRIDGE_XEON\n");
2442			set_ivybridge();
2443			break;
2444		case 0x3F:	/* Per Intel document 325462-045US 09/2014. */
2445			printf("Intel HASWELL (Xeon)\n");
2446			set_haswell();
2447			break;
2448		case 0x3C:	/* Per Intel document 325462-045US 01/2013. */
2449		case 0x45:
2450		case 0x46:
2451			printf("Intel HASWELL\n");
2452			set_haswell();
2453			break;
2454
2455		case 0x4e:
2456		case 0x5e:
2457			printf("Intel SKY-LAKE\n");
2458			goto not_supported;
2459			break;
2460		case 0x3D:
2461		case 0x47:
2462			printf("Intel BROADWELL\n");
2463			set_broadwell();
2464			break;
2465		case 0x4f:
2466		case 0x56:
2467			printf("Intel BROADWEL (Xeon)\n");
2468			set_broadwell();
2469			break;
2470
2471		case 0x4D:
2472			/* Per Intel document 330061-001 01/2014. */
2473			printf("Intel ATOM_SILVERMONT\n");
2474			goto not_supported;
2475			break;
2476		default:
2477			printf("Intel model 0x%x is not known -- sorry\n",
2478			       model);
2479			goto not_supported;
2480			break;
2481		}
2482		break;
2483	case 0xF00:		/* P4 */
2484		printf("Intel unknown model %d\n", model);
2485		goto not_supported;
2486		break;
2487	}
2488	do_cpuid(0xa, 0, reg);
2489	max_pmc_counters = (reg[3] & 0x0000000f) + 1;
2490	printf("We have %d PMC counters to work with\n", max_pmc_counters);
2491	/* Ok lets load the list of all known PMC's */
2492	io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2493	if (valid_pmcs == NULL) {
2494		/* Likely */
2495		pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2496		sz = sizeof(char *) * pmc_allocated_cnt;
2497		valid_pmcs = malloc(sz);
2498		if (valid_pmcs == NULL) {
2499			printf("No memory allocation fails at startup?\n");
2500			exit(-1);
2501		}
2502		memset(valid_pmcs, 0, sz);
2503	}
2504
2505	while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2506		if (linebuf[0] != '\t') {
2507			/* sometimes headers ;-) */
2508			continue;
2509		}
2510		len = strlen(linebuf);
2511		if (linebuf[(len-1)] == '\n') {
2512			/* Likely */
2513			linebuf[(len-1)] = 0;
2514		}
2515		str = &linebuf[1];
2516		len = strlen(str) + 1;
2517		valid_pmcs[valid_pmc_cnt] = malloc(len);
2518		if (valid_pmcs[valid_pmc_cnt] == NULL) {
2519			printf("No memory2 allocation fails at startup?\n");
2520			exit(-1);
2521		}
2522		memset(valid_pmcs[valid_pmc_cnt], 0, len);
2523		strcpy(valid_pmcs[valid_pmc_cnt], str);
2524		valid_pmc_cnt++;
2525		if (valid_pmc_cnt >= pmc_allocated_cnt) {
2526			/* Got to expand -- unlikely */
2527			char **more;
2528
2529			sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2530			more = malloc(sz);
2531			if (more == NULL) {
2532				printf("No memory3 allocation fails at startup?\n");
2533				exit(-1);
2534			}
2535			memset(more, 0, sz);
2536			memcpy(more, valid_pmcs, sz);
2537			pmc_allocated_cnt *= 2;
2538			free(valid_pmcs);
2539			valid_pmcs = more;
2540		}
2541	}
2542	my_pclose(io, pid_of_command);
2543	return;
2544not_supported:
2545	printf("Not supported\n");
2546	exit(-1);
2547}
2548
2549static void
2550explain_all(void)
2551{
2552	int i;
2553	printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2554	printf("-------------------------------------------------------------\n");
2555	for(i=0; i<the_cpu.number; i++){
2556		printf("For -e %s ", the_cpu.ents[i].name);
2557		(*the_cpu.explain)(the_cpu.ents[i].name);
2558		printf("----------------------------\n");
2559	}
2560}
2561
2562static void
2563test_for_a_pmc(const char *pmc, int out_so_far)
2564{
2565	FILE *io;
2566	pid_t pid_of_command=0;
2567	char my_command[1024];
2568	char line[1024];
2569	char resp[1024];
2570	int len, llen, i;
2571
2572	if (out_so_far < 50) {
2573		len = 50 - out_so_far;
2574		for(i=0; i<len; i++) {
2575			printf(" ");
2576		}
2577	}
2578	sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2579	io = my_popen(my_command, "r", &pid_of_command);
2580	if (io == NULL) {
2581		printf("Failed -- popen fails\n");
2582		return;
2583	}
2584	/* Setup what we expect */
2585	len = sprintf(resp, "%s", pmc);
2586	if (fgets(line, sizeof(line), io) == NULL) {
2587		printf("Failed -- no output from pmstat\n");
2588		goto out;
2589	}
2590	llen = strlen(line);
2591	if (line[(llen-1)] == '\n') {
2592		line[(llen-1)] = 0;
2593		llen--;
2594	}
2595	for(i=2; i<(llen-len); i++) {
2596		if (strncmp(&line[i], "ERROR", 5) == 0) {
2597			printf("Failed %s\n", line);
2598			goto out;
2599		} else if (strncmp(&line[i], resp, len) == 0) {
2600			int j, k;
2601
2602			if (fgets(line, sizeof(line), io) == NULL) {
2603				printf("Failed -- no second output from pmstat\n");
2604				goto out;
2605			}
2606			len = strlen(line);
2607			for (j=0; j<len; j++) {
2608				if (line[j] == ' ') {
2609					j++;
2610				} else {
2611					break;
2612				}
2613			}
2614			printf("Pass");
2615			len = strlen(&line[j]);
2616			if (len < 20) {
2617				for(k=0; k<(20-len); k++) {
2618					printf(" ");
2619				}
2620			}
2621			if (len) {
2622				printf("%s", &line[j]);
2623			} else {
2624				printf("\n");
2625			}
2626			goto out;
2627		}
2628	}
2629	printf("Failed -- '%s' not '%s'\n", line, resp);
2630out:
2631	my_pclose(io, pid_of_command);
2632
2633}
2634
2635static int
2636add_it_to(char **vars, int cur_cnt, char *name)
2637{
2638	int i;
2639	size_t len;
2640	for(i=0; i<cur_cnt; i++) {
2641		if (strcmp(vars[i], name) == 0) {
2642			/* Already have */
2643			return(0);
2644		}
2645	}
2646	if (vars[cur_cnt] != NULL) {
2647		printf("Cur_cnt:%d filled with %s??\n",
2648		       cur_cnt, vars[cur_cnt]);
2649		exit(-1);
2650	}
2651	/* Ok its new */
2652	len = strlen(name) + 1;
2653	vars[cur_cnt] = malloc(len);
2654	if (vars[cur_cnt] == NULL) {
2655		printf("No memory %s\n", __FUNCTION__);
2656		exit(-1);
2657	}
2658	memset(vars[cur_cnt], 0, len);
2659	strcpy(vars[cur_cnt], name);
2660	return(1);
2661}
2662
2663static char *
2664build_command_for_exp(struct expression *exp)
2665{
2666	/*
2667	 * Build the pmcstat command to handle
2668	 * the passed in expression.
2669	 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2670	 * where NNN and QQQ represent the PMC's in the expression
2671	 * uniquely..
2672	 */
2673	char forming[1024];
2674	int cnt_pmc, alloced_pmcs, i;
2675	struct expression *at;
2676	char **vars, *cmd;
2677	size_t mal;
2678
2679	alloced_pmcs = cnt_pmc = 0;
2680	/* first how many do we have */
2681	at = exp;
2682	while (at) {
2683		if (at->type == TYPE_VALUE_PMC) {
2684			cnt_pmc++;
2685		}
2686		at = at->next;
2687	}
2688	if (cnt_pmc == 0) {
2689		printf("No PMC's in your expression -- nothing to do!!\n");
2690		exit(0);
2691	}
2692	mal = cnt_pmc * sizeof(char *);
2693	vars = malloc(mal);
2694	if (vars == NULL) {
2695		printf("No memory\n");
2696		exit(-1);
2697	}
2698	memset(vars, 0, mal);
2699	at = exp;
2700	while (at) {
2701		if (at->type == TYPE_VALUE_PMC) {
2702			if(add_it_to(vars, alloced_pmcs, at->name)) {
2703				alloced_pmcs++;
2704			}
2705		}
2706		at = at->next;
2707	}
2708	/* Now we have a unique list in vars so create our command */
2709	mal = 23; /*	"/usr/sbin/pmcstat -w 1"  + \0 */
2710	for(i=0; i<alloced_pmcs; i++) {
2711		mal += strlen(vars[i]) + 4;	/* var + " -s " */
2712	}
2713	cmd = malloc((mal+2));
2714	if (cmd == NULL) {
2715		printf("%s out of mem\n", __FUNCTION__);
2716		exit(-1);
2717	}
2718	memset(cmd, 0, (mal+2));
2719	strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2720	at = exp;
2721	for(i=0; i<alloced_pmcs; i++) {
2722		sprintf(forming, " -s %s", vars[i]);
2723		strcat(cmd, forming);
2724		free(vars[i]);
2725		vars[i] = NULL;
2726	}
2727	free(vars);
2728	return(cmd);
2729}
2730
2731static int
2732user_expr(struct counters *cpu, int pos)
2733{
2734	int ret;
2735	double res;
2736	struct counters *var;
2737	struct expression *at;
2738
2739	at = master_exp;
2740	while (at) {
2741		if (at->type == TYPE_VALUE_PMC) {
2742			var = find_counter(cpu, at->name);
2743			if (var == NULL) {
2744				printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2745				exit(-1);
2746			}
2747			if (pos != -1) {
2748				at->value = var->vals[pos] * 1.0;
2749			} else {
2750				at->value = var->sum * 1.0;
2751			}
2752		}
2753		at = at->next;
2754	}
2755	res = run_expr(master_exp, 1, NULL);
2756	ret = printf("%1.3f", res);
2757	return(ret);
2758}
2759
2760
2761static void
2762set_manual_exp(struct expression *exp)
2763{
2764	expression = user_expr;
2765	command = build_command_for_exp(exp);
2766	threshold = "User defined threshold";
2767}
2768
2769static void
2770run_tests(void)
2771{
2772	int i, lenout;
2773	printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2774	printf("------------------------------------------------------------------------\n");
2775	for(i=0; i<valid_pmc_cnt; i++) {
2776		lenout = printf("%s", valid_pmcs[i]);
2777		fflush(stdout);
2778		test_for_a_pmc(valid_pmcs[i], lenout);
2779	}
2780}
2781static void
2782list_all(void)
2783{
2784	int i, cnt, j;
2785	printf("PMC                                               Abbreviation\n");
2786	printf("--------------------------------------------------------------\n");
2787	for(i=0; i<valid_pmc_cnt; i++) {
2788		cnt = printf("%s", valid_pmcs[i]);
2789		for(j=cnt; j<52; j++) {
2790			printf(" ");
2791		}
2792		printf("%%%d\n", i);
2793	}
2794}
2795
2796
2797int
2798main(int argc, char **argv)
2799{
2800	int i, j, cnt;
2801	char *filename=NULL;
2802	const char *name=NULL;
2803	int help_only = 0;
2804	int test_mode = 0;
2805	int test_at = 0;
2806
2807	get_cpuid_set();
2808	memset(glob_cpu, 0, sizeof(glob_cpu));
2809	while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
2810		switch (i) {
2811		case 'A':
2812			run_all = 1;
2813			break;
2814		case 'L':
2815			list_all();
2816			return(0);
2817		case 'H':
2818			printf("**********************************\n");
2819			explain_all();
2820			printf("**********************************\n");
2821			return(0);
2822			break;
2823		case 'T':
2824			test_mode = 1;
2825			break;
2826		case 'E':
2827			master_exp = parse_expression(optarg);
2828			if (master_exp) {
2829				set_manual_exp(master_exp);
2830			}
2831			break;
2832		case 'e':
2833			if (validate_expression(optarg)) {
2834				printf("Unknown expression %s\n", optarg);
2835				return(0);
2836			}
2837			name = optarg;
2838			set_expression(optarg);
2839			break;
2840		case 'm':
2841			max_to_collect = strtol(optarg, NULL, 0);
2842			if (max_to_collect > MAX_COUNTER_SLOTS) {
2843				/* You can't collect more than max in array */
2844				max_to_collect = MAX_COUNTER_SLOTS;
2845			}
2846			break;
2847		case 'v':
2848			verbose++;
2849			break;
2850		case 'h':
2851			help_only = 1;
2852			break;
2853		case 'i':
2854			filename = optarg;
2855			break;
2856		case '?':
2857		default:
2858		use:
2859			printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2860			       argv[0]);
2861			printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2862			printf("-v -- verbose dump debug type things -- you don't want this\n");
2863			printf("-m N -- maximum to collect is N measurments\n");
2864			printf("-e expr-name -- Do expression expr-name\n");
2865			printf("-E 'your expression' -- Do your expression\n");
2866			printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2867			printf("-H -- Don't run anything, just explain all canned expressions\n");
2868			printf("-T -- Test all PMC's defined by this processor\n");
2869			printf("-A -- Run all canned tests\n");
2870			return(0);
2871			break;
2872		}
2873	}
2874	if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
2875	    (test_mode == 0) && (master_exp == NULL)) {
2876		printf("Without setting an expression we cannot dynamically gather information\n");
2877		printf("you must supply a filename (and you probably want verbosity)\n");
2878		goto use;
2879	}
2880	if (run_all && max_to_collect > 10) {
2881		max_to_collect = 3;
2882	}
2883	if (test_mode) {
2884		run_tests();
2885		return(0);
2886	}
2887	printf("*********************************\n");
2888	if ((master_exp == NULL) && name) {
2889		(*the_cpu.explain)(name);
2890	} else if (master_exp) {
2891		printf("Examine your expression ");
2892		print_exp(master_exp);
2893		printf("User defined threshold\n");
2894	}
2895	if (help_only) {
2896		return(0);
2897	}
2898	if (run_all) {
2899	more:
2900		name = the_cpu.ents[test_at].name;
2901		printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
2902		test_at++;
2903		if (set_expression(name) == -1) {
2904			if (test_at >= the_cpu.number) {
2905				goto done;
2906			} else
2907				goto more;
2908		}
2909
2910	}
2911	process_file(filename);
2912	if (verbose >= 2) {
2913		for (i=0; i<ncnts; i++) {
2914			printf("Counter:%s cpu:%d index:%d\n",
2915			       cnts[i].counter_name,
2916			       cnts[i].cpu, i);
2917			for(j=0; j<cnts[i].pos; j++) {
2918				printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2919			}
2920			printf(" sum - %ld\n", (long int)cnts[i].sum);
2921		}
2922	}
2923	if (expression == NULL) {
2924		return(0);
2925	}
2926	if (max_to_collect > 1) {
2927		for(i=0, cnt=0; i<MAX_CPU; i++) {
2928			if (glob_cpu[i]) {
2929				do_expression(glob_cpu[i], -1);
2930				cnt++;
2931				if (cnt == cpu_count_out) {
2932					printf("\n");
2933					break;
2934				} else {
2935					printf("\t");
2936				}
2937			}
2938		}
2939	}
2940	if (run_all && (test_at < the_cpu.number)) {
2941		memset(glob_cpu, 0, sizeof(glob_cpu));
2942		ncnts = 0;
2943		printf("*********************************\n");
2944		goto more;
2945	} else if (run_all) {
2946	done:
2947		printf("*********************************\n");
2948	}
2949	return(0);
2950}
2951