1/*-
2 * Copyright (c) 2014, 2015 Netflix Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer,
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28#include <sys/types.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <unistd.h>
32#include <string.h>
33#include <strings.h>
34#include <sys/errno.h>
35#include <signal.h>
36#include <sys/wait.h>
37#include <getopt.h>
38#include "eval_expr.h"
39__FBSDID("$FreeBSD: releng/10.3/usr.sbin/pmcstudy/pmcstudy.c 285849 2015-07-24 19:09:11Z emaste $");
40
41#define MAX_COUNTER_SLOTS 1024
42#define MAX_NLEN 64
43#define MAX_CPU 64
44static int verbose = 0;
45
46extern char **environ;
47extern struct expression *master_exp;
48struct expression *master_exp=NULL;
49
50#define PMC_INITIAL_ALLOC 512
51extern char **valid_pmcs;
52char **valid_pmcs = NULL;
53extern int valid_pmc_cnt;
54int valid_pmc_cnt=0;
55extern int pmc_allocated_cnt;
56int pmc_allocated_cnt=0;
57
58/*
59 * The following two varients on popen and pclose with
60 * the cavet that they get you the PID so that you
61 * can supply it to pclose so it can send a SIGTERM
62 *  to the process.
63 */
64static FILE *
65my_popen(const char *command, const char *dir, pid_t *p_pid)
66{
67	FILE *io_out, *io_in;
68	int pdesin[2], pdesout[2];
69	char *argv[4];
70	pid_t pid;
71	char cmd[4];
72	char cmd2[1024];
73	char arg1[4];
74
75	if ((strcmp(dir, "r") != 0) &&
76	    (strcmp(dir, "w") != 0)) {
77		errno = EINVAL;
78		return(NULL);
79	}
80	if (pipe(pdesin) < 0)
81		return (NULL);
82
83	if (pipe(pdesout) < 0) {
84		(void)close(pdesin[0]);
85		(void)close(pdesin[1]);
86		return (NULL);
87	}
88	strcpy(cmd, "sh");
89	strcpy(arg1, "-c");
90	strcpy(cmd2, command);
91	argv[0] = cmd;
92	argv[1] = arg1;
93	argv[2] = cmd2;
94	argv[3] = NULL;
95
96	switch (pid = fork()) {
97	case -1:			/* Error. */
98		(void)close(pdesin[0]);
99		(void)close(pdesin[1]);
100		(void)close(pdesout[0]);
101		(void)close(pdesout[1]);
102		return (NULL);
103		/* NOTREACHED */
104	case 0:				/* Child. */
105		/* Close out un-used sides */
106		(void)close(pdesin[1]);
107		(void)close(pdesout[0]);
108		/* Now prepare the stdin of the process */
109		close(0);
110		(void)dup(pdesin[0]);
111		(void)close(pdesin[0]);
112		/* Now prepare the stdout of the process */
113		close(1);
114		(void)dup(pdesout[1]);
115		/* And lets do stderr just in case */
116		close(2);
117		(void)dup(pdesout[1]);
118		(void)close(pdesout[1]);
119		/* Now run it */
120		execve("/bin/sh", argv, environ);
121		exit(127);
122		/* NOTREACHED */
123	}
124	/* Parent; assume fdopen can't fail. */
125	/* Store the pid */
126	*p_pid = pid;
127	if (strcmp(dir, "r") != 0) {
128		io_out = fdopen(pdesin[1], "w");
129		(void)close(pdesin[0]);
130		(void)close(pdesout[0]);
131		(void)close(pdesout[1]);
132		return(io_out);
133 	} else {
134		/* Prepare the input stream */
135		io_in = fdopen(pdesout[0], "r");
136		(void)close(pdesout[1]);
137		(void)close(pdesin[0]);
138		(void)close(pdesin[1]);
139		return (io_in);
140	}
141}
142
143/*
144 * pclose --
145 *	Pclose returns -1 if stream is not associated with a `popened' command,
146 *	if already `pclosed', or waitpid returns an error.
147 */
148static void
149my_pclose(FILE *io, pid_t the_pid)
150{
151	int pstat;
152	pid_t pid;
153
154	/*
155	 * Find the appropriate file pointer and remove it from the list.
156	 */
157	(void)fclose(io);
158	/* Die if you are not dead! */
159	kill(the_pid, SIGTERM);
160	do {
161		pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
162	} while (pid == -1 && errno == EINTR);
163}
164
165struct counters {
166	struct counters *next_cpu;
167	char counter_name[MAX_NLEN];		/* Name of counter */
168	int cpu;				/* CPU we are on */
169	int pos;				/* Index we are filling to. */
170	uint64_t vals[MAX_COUNTER_SLOTS];	/* Last 64 entries */
171	uint64_t sum;				/* Summary of entries */
172};
173
174extern struct counters *glob_cpu[MAX_CPU];
175struct counters *glob_cpu[MAX_CPU];
176
177extern struct counters *cnts;
178struct counters *cnts=NULL;
179
180extern int ncnts;
181int ncnts=0;
182
183extern int (*expression)(struct counters *, int);
184int (*expression)(struct counters *, int);
185
186static const char *threshold=NULL;
187static const char *command;
188
189struct cpu_entry {
190	const char *name;
191	const char *thresh;
192	const char *command;
193	int (*func)(struct counters *, int);
194};
195
196
197struct cpu_type {
198	char cputype[32];
199	int number;
200	struct cpu_entry *ents;
201	void (*explain)(const char *name);
202};
203extern struct cpu_type the_cpu;
204struct cpu_type the_cpu;
205
206static void
207explain_name_sb(const char *name)
208{
209	const char *mythresh;
210	if (strcmp(name, "allocstall1") == 0) {
211		printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
212		mythresh = "thresh > .05";
213	} else if (strcmp(name, "allocstall2") == 0) {
214		printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
215		mythresh = "thresh > .05";
216	} else if (strcmp(name, "br_miss") == 0) {
217		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
218		mythresh = "thresh >= .2";
219	} else if (strcmp(name, "splitload") == 0) {
220		printf("Examine MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
221		mythresh = "thresh >= .1";
222	} else if (strcmp(name, "splitstore") == 0) {
223		printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
224		mythresh = "thresh >= .01";
225	} else if (strcmp(name, "contested") == 0) {
226		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
227		mythresh = "thresh >= .05";
228	} else if (strcmp(name, "blockstorefwd") == 0) {
229		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
230		mythresh = "thresh >= .05";
231	} else if (strcmp(name, "cache2") == 0) {
232		printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
233		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
234		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
235		printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
236		mythresh = "thresh >= .2";
237	} else if (strcmp(name, "cache1") == 0) {
238		printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
239		mythresh = "thresh >= .2";
240	} else if (strcmp(name, "dtlbmissload") == 0) {
241		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
242		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
243		mythresh = "thresh >= .1";
244	} else if (strcmp(name, "frontendstall") == 0) {
245		printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
246		mythresh = "thresh >= .15";
247	} else if (strcmp(name, "clears") == 0) {
248		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
249		printf("          MACHINE_CLEARS.SMC + \n");
250		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
251		mythresh = "thresh >= .02";
252	} else if (strcmp(name, "microassist") == 0) {
253		printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
254		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
255		mythresh = "thresh >= .05";
256	} else if (strcmp(name, "aliasing_4k") == 0) {
257		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
258		mythresh = "thresh >= .1";
259	} else if (strcmp(name, "fpassist") == 0) {
260		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
261		mythresh = "look for a excessive value";
262	} else if (strcmp(name, "otherassistavx") == 0) {
263		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
264		mythresh = "look for a excessive value";
265	} else if (strcmp(name, "otherassistsse") == 0) {
266		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
267		mythresh = "look for a excessive value";
268	} else if (strcmp(name, "eff1") == 0) {
269		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
270		mythresh = "thresh < .9";
271	} else if (strcmp(name, "eff2") == 0) {
272		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
273		mythresh = "thresh > 1.0";
274	} else if (strcmp(name, "dtlbmissstore") == 0) {
275		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
276		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
277		mythresh = "thresh >= .05";
278	} else {
279		printf("Unknown name:%s\n", name);
280		mythresh = "unknown entry";
281        }
282	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
283}
284
285static void
286explain_name_ib(const char *name)
287{
288	const char *mythresh;
289	if (strcmp(name, "br_miss") == 0) {
290		printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
291		printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
292		printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
293		mythresh = "thresh >= .2";
294	} else if (strcmp(name, "eff1") == 0) {
295		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
296		mythresh = "thresh < .9";
297	} else if (strcmp(name, "eff2") == 0) {
298		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
299		mythresh = "thresh > 1.0";
300	} else if (strcmp(name, "cache1") == 0) {
301		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
302		mythresh = "thresh >= .2";
303	} else if (strcmp(name, "cache2") == 0) {
304		printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
305		mythresh = "thresh >= .2";
306	} else if (strcmp(name, "itlbmiss") == 0) {
307		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
308		mythresh = "thresh > .05";
309	} else if (strcmp(name, "icachemiss") == 0) {
310		printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
311		mythresh = "thresh > .05";
312	} else if (strcmp(name, "lcpstall") == 0) {
313		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
314		mythresh = "thresh > .05";
315	} else if (strcmp(name, "datashare") == 0) {
316		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
317		mythresh = "thresh > .05";
318	} else if (strcmp(name, "blockstorefwd") == 0) {
319		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
320		mythresh = "thresh >= .05";
321	} else if (strcmp(name, "splitload") == 0) {
322		printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
323		printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
324		mythresh = "thresh >= .1";
325	} else if (strcmp(name, "splitstore") == 0) {
326		printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
327		mythresh = "thresh >= .01";
328	} else if (strcmp(name, "aliasing_4k") == 0) {
329		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
330		mythresh = "thresh >= .1";
331	} else if (strcmp(name, "dtlbmissload") == 0) {
332		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
333		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
334		mythresh = "thresh >= .1";
335	} else if (strcmp(name, "dtlbmissstore") == 0) {
336		printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
337		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
338		mythresh = "thresh >= .05";
339	} else if (strcmp(name, "contested") == 0) {
340		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
341		mythresh = "thresh >= .05";
342	} else if (strcmp(name, "clears") == 0) {
343		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
344		printf("          MACHINE_CLEARS.SMC + \n");
345		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
346		mythresh = "thresh >= .02";
347	} else if (strcmp(name, "microassist") == 0) {
348		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
349		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
350		mythresh = "thresh >= .05";
351	} else if (strcmp(name, "fpassist") == 0) {
352		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
353		mythresh = "look for a excessive value";
354	} else if (strcmp(name, "otherassistavx") == 0) {
355		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
356		mythresh = "look for a excessive value";
357	} else if (strcmp(name, "otherassistsse") == 0) {
358		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
359		mythresh = "look for a excessive value";
360	} else {
361		printf("Unknown name:%s\n", name);
362		mythresh = "unknown entry";
363        }
364	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
365}
366
367
368static void
369explain_name_has(const char *name)
370{
371	const char *mythresh;
372	if (strcmp(name, "eff1") == 0) {
373		printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
374		mythresh = "thresh < .75";
375	} else if (strcmp(name, "eff2") == 0) {
376		printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
377		mythresh = "thresh > 1.0";
378	} else if (strcmp(name, "itlbmiss") == 0) {
379		printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
380		mythresh = "thresh > .05";
381	} else if (strcmp(name, "icachemiss") == 0) {
382		printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
383		mythresh = "thresh > .05";
384	} else if (strcmp(name, "lcpstall") == 0) {
385		printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
386		mythresh = "thresh > .05";
387	} else if (strcmp(name, "cache1") == 0) {
388		printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
389		mythresh = "thresh >= .2";
390	} else if (strcmp(name, "cache2") == 0) {
391		printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
392		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
393		printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
394		printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
395		mythresh = "thresh >= .2";
396	} else if (strcmp(name, "contested") == 0) {
397		printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
398		mythresh = "thresh >= .05";
399	} else if (strcmp(name, "datashare") == 0) {
400		printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
401		mythresh = "thresh > .05";
402	} else if (strcmp(name, "blockstorefwd") == 0) {
403		printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
404		mythresh = "thresh >= .05";
405	} else if (strcmp(name, "splitload") == 0) {
406		printf("Examine  (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
407		mythresh = "thresh >= .1";
408	} else if (strcmp(name, "splitstore") == 0) {
409		printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
410		mythresh = "thresh >= .01";
411	} else if (strcmp(name, "aliasing_4k") == 0) {
412		printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
413		mythresh = "thresh >= .1";
414	} else if (strcmp(name, "dtlbmissload") == 0) {
415		printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
416		printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
417		mythresh = "thresh >= .1";
418	} else if (strcmp(name, "br_miss") == 0) {
419		printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
420		mythresh = "thresh >= .2";
421	} else if (strcmp(name, "clears") == 0) {
422		printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
423		printf("          MACHINE_CLEARS.SMC + \n");
424		printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
425		mythresh = "thresh >= .02";
426	} else if (strcmp(name, "microassist") == 0) {
427		printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
428		printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
429		mythresh = "thresh >= .05";
430	} else if (strcmp(name, "fpassist") == 0) {
431		printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
432		mythresh = "look for a excessive value";
433	} else if (strcmp(name, "otherassistavx") == 0) {
434		printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
435		mythresh = "look for a excessive value";
436	} else if (strcmp(name, "otherassistsse") == 0) {
437		printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
438		mythresh = "look for a excessive value";
439	} else {
440		printf("Unknown name:%s\n", name);
441		mythresh = "unknown entry";
442        }
443	printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
444}
445
446
447static struct counters *
448find_counter(struct counters *base, const char *name)
449{
450	struct counters *at;
451	int len;
452
453	at = base;
454	len = strlen(name);
455	while(at) {
456		if (strncmp(at->counter_name, name, len) == 0) {
457			return(at);
458		}
459		at = at->next_cpu;
460	}
461	printf("Can't find counter %s\n", name);
462	printf("We have:\n");
463	at = base;
464	while(at) {
465		printf("- %s\n", at->counter_name);
466		at = at->next_cpu;
467	}
468	exit(-1);
469}
470
471static int
472allocstall1(struct counters *cpu, int pos)
473{
474/*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
475	int ret;
476	struct counters *partial;
477	struct counters *unhalt;
478	double un, par, res;
479	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
480	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
481	if (pos != -1) {
482		par = partial->vals[pos] * 1.0;
483		un = unhalt->vals[pos] * 1.0;
484	} else {
485		par = partial->sum * 1.0;
486		un = unhalt->sum * 1.0;
487	}
488	res = par/un;
489	ret = printf("%1.3f", res);
490	return(ret);
491}
492
493static int
494allocstall2(struct counters *cpu, int pos)
495{
496/*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
497	int ret;
498	struct counters *partial;
499	struct counters *unhalt;
500	double un, par, res;
501	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
502	partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
503	if (pos != -1) {
504		par = partial->vals[pos] * 1.0;
505		un = unhalt->vals[pos] * 1.0;
506	} else {
507		par = partial->sum * 1.0;
508		un = unhalt->sum * 1.0;
509	}
510	res = par/un;
511	ret = printf("%1.3f", res);
512	return(ret);
513}
514
515static int
516br_mispredict(struct counters *cpu, int pos)
517{
518	struct counters *brctr;
519	struct counters *unhalt;
520	int ret;
521/*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
522	double br, un, con, res;
523	con = 20.0;
524
525	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
526        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
527	if (pos != -1) {
528		br = brctr->vals[pos] * 1.0;
529		un = unhalt->vals[pos] * 1.0;
530	} else {
531		br = brctr->sum * 1.0;
532		un = unhalt->sum * 1.0;
533	}
534	res = (con * br)/un;
535 	ret = printf("%1.3f", res);
536	return(ret);
537}
538
539static int
540br_mispredictib(struct counters *cpu, int pos)
541{
542	struct counters *brctr;
543	struct counters *unhalt;
544	struct counters *clear, *clear2, *clear3;
545	struct counters *uops;
546	struct counters *recv;
547	struct counters *iss;
548/*	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
549	int ret;
550        /*
551	 * (BR_MISP_RETIRED.ALL_BRANCHES /
552	 *         (BR_MISP_RETIRED.ALL_BRANCHES +
553	 *          MACHINE_CLEAR.COUNT) *
554	 *	   ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
555	 *
556	 */
557	double br, cl, cl2, cl3, uo, re, un, con, res, is;
558	con = 4.0;
559
560	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
561        brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
562	clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
563	clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
564	clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
565	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
566	iss = find_counter(cpu, "UOPS_ISSUED.ANY");
567	recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
568	if (pos != -1) {
569		br = brctr->vals[pos] * 1.0;
570		cl = clear->vals[pos] * 1.0;
571		cl2 = clear2->vals[pos] * 1.0;
572		cl3 = clear3->vals[pos] * 1.0;
573		uo = uops->vals[pos] * 1.0;
574		re = recv->vals[pos] * 1.0;
575		is = iss->vals[pos] * 1.0;
576		un = unhalt->vals[pos] * 1.0;
577	} else {
578		br = brctr->sum * 1.0;
579		cl = clear->sum * 1.0;
580		cl2 = clear2->sum * 1.0;
581		cl3 = clear3->sum * 1.0;
582		uo = uops->sum * 1.0;
583		re = recv->sum * 1.0;
584		is = iss->sum * 1.0;
585		un = unhalt->sum * 1.0;
586	}
587	res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
588 	ret = printf("%1.3f", res);
589	return(ret);
590}
591
592static int
593splitloadib(struct counters *cpu, int pos)
594{
595	int ret;
596	struct counters *mem;
597	struct counters *l1d, *ldblock;
598	struct counters *unhalt;
599	double un, memd, res, l1, ldb;
600        /*
601	 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
602	 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
603	 */
604
605	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
606	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
607	l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
608	ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
609	if (pos != -1) {
610		memd = mem->vals[pos] * 1.0;
611		l1 = l1d->vals[pos] * 1.0;
612		ldb = ldblock->vals[pos] * 1.0;
613		un = unhalt->vals[pos] * 1.0;
614	} else {
615		memd = mem->sum * 1.0;
616		l1 = l1d->sum * 1.0;
617		ldb = ldblock->sum * 1.0;
618		un = unhalt->sum * 1.0;
619	}
620	res = ((l1 / memd) * ldb)/un;
621	ret = printf("%1.3f", res);
622	return(ret);
623}
624
625static int
626splitload(struct counters *cpu, int pos)
627{
628	int ret;
629	struct counters *mem;
630	struct counters *unhalt;
631	double con, un, memd, res;
632/*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
633
634	con = 5.0;
635	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
636	mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
637	if (pos != -1) {
638		memd = mem->vals[pos] * 1.0;
639		un = unhalt->vals[pos] * 1.0;
640	} else {
641		memd = mem->sum * 1.0;
642		un = unhalt->sum * 1.0;
643	}
644	res = (memd * con)/un;
645	ret = printf("%1.3f", res);
646	return(ret);
647}
648
649static int
650splitstore(struct counters *cpu, int pos)
651{
652        /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
653	int ret;
654	struct counters *mem_split;
655	struct counters *mem_stores;
656	double memsplit, memstore, res;
657	mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
658	mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
659	if (pos != -1) {
660		memsplit = mem_split->vals[pos] * 1.0;
661		memstore = mem_stores->vals[pos] * 1.0;
662	} else {
663		memsplit = mem_split->sum * 1.0;
664		memstore = mem_stores->sum * 1.0;
665	}
666	res = memsplit/memstore;
667	ret = printf("%1.3f", res);
668	return(ret);
669}
670
671
672static int
673contested(struct counters *cpu, int pos)
674{
675        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
676	int ret;
677	struct counters *mem;
678	struct counters *unhalt;
679	double con, un, memd, res;
680
681	con = 60.0;
682	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
683	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
684	if (pos != -1) {
685		memd = mem->vals[pos] * 1.0;
686		un = unhalt->vals[pos] * 1.0;
687	} else {
688		memd = mem->sum * 1.0;
689		un = unhalt->sum * 1.0;
690	}
691	res = (memd * con)/un;
692	ret = printf("%1.3f", res);
693	return(ret);
694}
695
696static int
697contested_has(struct counters *cpu, int pos)
698{
699        /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
700	int ret;
701	struct counters *mem;
702	struct counters *unhalt;
703	double con, un, memd, res;
704
705	con = 84.0;
706	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
707	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
708	if (pos != -1) {
709		memd = mem->vals[pos] * 1.0;
710		un = unhalt->vals[pos] * 1.0;
711	} else {
712		memd = mem->sum * 1.0;
713		un = unhalt->sum * 1.0;
714	}
715	res = (memd * con)/un;
716	ret = printf("%1.3f", res);
717	return(ret);
718}
719
720
721static int
722blockstoreforward(struct counters *cpu, int pos)
723{
724        /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
725	int ret;
726	struct counters *ldb;
727	struct counters *unhalt;
728	double con, un, ld, res;
729
730	con = 13.0;
731	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
732	ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
733	if (pos != -1) {
734		ld = ldb->vals[pos] * 1.0;
735		un = unhalt->vals[pos] * 1.0;
736	} else {
737		ld = ldb->sum * 1.0;
738		un = unhalt->sum * 1.0;
739	}
740	res = (ld * con)/un;
741	ret = printf("%1.3f", res);
742	return(ret);
743}
744
745static int
746cache2(struct counters *cpu, int pos)
747{
748	/* ** Suspect ***
749	 *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
750	 *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
751	 */
752	int ret;
753	struct counters *mem1, *mem2, *mem3;
754	struct counters *unhalt;
755	double con1, con2, con3, un, me_1, me_2, me_3, res;
756
757	con1 = 26.0;
758	con2 = 43.0;
759	con3 = 60.0;
760	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
761/* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
762	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
763	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
764	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
765	if (pos != -1) {
766		me_1 = mem1->vals[pos] * 1.0;
767		me_2 = mem2->vals[pos] * 1.0;
768		me_3 = mem3->vals[pos] * 1.0;
769		un = unhalt->vals[pos] * 1.0;
770	} else {
771		me_1 = mem1->sum * 1.0;
772		me_2 = mem2->sum * 1.0;
773		me_3 = mem3->sum * 1.0;
774		un = unhalt->sum * 1.0;
775	}
776	res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
777	ret = printf("%1.3f", res);
778	return(ret);
779}
780
781static int
782datasharing(struct counters *cpu, int pos)
783{
784	/*
785	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
786	 */
787	int ret;
788	struct counters *mem;
789	struct counters *unhalt;
790	double con, res, me, un;
791
792	con = 43.0;
793	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
794	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
795	if (pos != -1) {
796		me = mem->vals[pos] * 1.0;
797		un = unhalt->vals[pos] * 1.0;
798	} else {
799		me = mem->sum * 1.0;
800		un = unhalt->sum * 1.0;
801	}
802	res = (me * con)/un;
803	ret = printf("%1.3f", res);
804	return(ret);
805
806}
807
808
809static int
810datasharing_has(struct counters *cpu, int pos)
811{
812	/*
813	 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
814	 */
815	int ret;
816	struct counters *mem;
817	struct counters *unhalt;
818	double con, res, me, un;
819
820	con = 72.0;
821	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
822	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
823	if (pos != -1) {
824		me = mem->vals[pos] * 1.0;
825		un = unhalt->vals[pos] * 1.0;
826	} else {
827		me = mem->sum * 1.0;
828		un = unhalt->sum * 1.0;
829	}
830	res = (me * con)/un;
831	ret = printf("%1.3f", res);
832	return(ret);
833
834}
835
836
837static int
838cache2ib(struct counters *cpu, int pos)
839{
840        /*
841	 *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
842	 */
843	int ret;
844	struct counters *mem;
845	struct counters *unhalt;
846	double con, un, me, res;
847
848	con = 29.0;
849	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
850	mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
851	if (pos != -1) {
852		me = mem->vals[pos] * 1.0;
853		un = unhalt->vals[pos] * 1.0;
854	} else {
855		me = mem->sum * 1.0;
856		un = unhalt->sum * 1.0;
857	}
858	res = (con * me)/un;
859	ret = printf("%1.3f", res);
860	return(ret);
861}
862
863static int
864cache2has(struct counters *cpu, int pos)
865{
866	/*
867	 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
868	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
869	 *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
870	 *           / CPU_CLK_UNHALTED.THREAD_P
871	 */
872	int ret;
873	struct counters *mem1, *mem2, *mem3;
874	struct counters *unhalt;
875	double con1, con2, con3, un, me1, me2, me3, res;
876
877	con1 = 36.0;
878	con2 = 72.0;
879	con3 = 84.0;
880	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
881	mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
882	mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
883	mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
884	if (pos != -1) {
885		me1 = mem1->vals[pos] * 1.0;
886		me2 = mem2->vals[pos] * 1.0;
887		me3 = mem3->vals[pos] * 1.0;
888		un = unhalt->vals[pos] * 1.0;
889	} else {
890		me1 = mem1->sum * 1.0;
891		me2 = mem2->sum * 1.0;
892		me3 = mem3->sum * 1.0;
893		un = unhalt->sum * 1.0;
894	}
895	res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
896	ret = printf("%1.3f", res);
897	return(ret);
898}
899
900static int
901cache1(struct counters *cpu, int pos)
902{
903	/*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
904	int ret;
905	struct counters *mem;
906	struct counters *unhalt;
907	double con, un, me, res;
908
909	con = 180.0;
910	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
911	mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
912	if (pos != -1) {
913		me = mem->vals[pos] * 1.0;
914		un = unhalt->vals[pos] * 1.0;
915	} else {
916		me = mem->sum * 1.0;
917		un = unhalt->sum * 1.0;
918	}
919	res = (me * con)/un;
920	ret = printf("%1.3f", res);
921	return(ret);
922}
923
924static int
925cache1ib(struct counters *cpu, int pos)
926{
927	/*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
928	int ret;
929	struct counters *mem;
930	struct counters *unhalt;
931	double con, un, me, res;
932
933	con = 180.0;
934	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
935	mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
936	if (pos != -1) {
937		me = mem->vals[pos] * 1.0;
938		un = unhalt->vals[pos] * 1.0;
939	} else {
940		me = mem->sum * 1.0;
941		un = unhalt->sum * 1.0;
942	}
943	res = (me * con)/un;
944	ret = printf("%1.3f", res);
945	return(ret);
946}
947
948
949static int
950dtlb_missload(struct counters *cpu, int pos)
951{
952	/* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
953	int ret;
954	struct counters *dtlb_m, *dtlb_d;
955	struct counters *unhalt;
956	double con, un, d1, d2, res;
957
958	con = 7.0;
959	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
960	dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
961	dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
962	if (pos != -1) {
963		d1 = dtlb_m->vals[pos] * 1.0;
964		d2 = dtlb_d->vals[pos] * 1.0;
965		un = unhalt->vals[pos] * 1.0;
966	} else {
967		d1 = dtlb_m->sum * 1.0;
968		d2 = dtlb_d->sum * 1.0;
969		un = unhalt->sum * 1.0;
970	}
971	res = ((d1 * con) + d2)/un;
972	ret = printf("%1.3f", res);
973	return(ret);
974}
975
976static int
977dtlb_missstore(struct counters *cpu, int pos)
978{
979        /*
980	 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
981	 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
982	 */
983        int ret;
984        struct counters *dtsb_m, *dtsb_d;
985        struct counters *unhalt;
986        double con, un, d1, d2, res;
987
988        con = 7.0;
989        unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
990        dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
991        dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
992        if (pos != -1) {
993                d1 = dtsb_m->vals[pos] * 1.0;
994                d2 = dtsb_d->vals[pos] * 1.0;
995                un = unhalt->vals[pos] * 1.0;
996        } else {
997                d1 = dtsb_m->sum * 1.0;
998                d2 = dtsb_d->sum * 1.0;
999                un = unhalt->sum * 1.0;
1000        }
1001        res = ((d1 * con) + d2)/un;
1002        ret = printf("%1.3f", res);
1003        return(ret);
1004}
1005
1006static int
1007itlb_miss(struct counters *cpu, int pos)
1008{
1009	/* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
1010	int ret;
1011	struct counters *itlb;
1012	struct counters *unhalt;
1013	double un, d1, res;
1014
1015	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1016	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1017	if (pos != -1) {
1018		d1 = itlb->vals[pos] * 1.0;
1019		un = unhalt->vals[pos] * 1.0;
1020	} else {
1021		d1 = itlb->sum * 1.0;
1022		un = unhalt->sum * 1.0;
1023	}
1024	res = d1/un;
1025	ret = printf("%1.3f", res);
1026	return(ret);
1027}
1028
1029static int
1030icache_miss(struct counters *cpu, int pos)
1031{
1032	/* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1033
1034	int ret;
1035	struct counters *itlb, *icache;
1036	struct counters *unhalt;
1037	double un, d1, ic, res;
1038
1039	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1040	itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1041	icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1042	if (pos != -1) {
1043		d1 = itlb->vals[pos] * 1.0;
1044		ic = icache->vals[pos] * 1.0;
1045		un = unhalt->vals[pos] * 1.0;
1046	} else {
1047		d1 = itlb->sum * 1.0;
1048		ic = icache->sum * 1.0;
1049		un = unhalt->sum * 1.0;
1050	}
1051	res = (ic-d1)/un;
1052	ret = printf("%1.3f", res);
1053	return(ret);
1054
1055}
1056
1057static int
1058icache_miss_has(struct counters *cpu, int pos)
1059{
1060	/* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1061
1062	int ret;
1063	struct counters *icache;
1064	struct counters *unhalt;
1065	double un, con, ic, res;
1066
1067	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1068	icache = find_counter(cpu, "ICACHE.MISSES");
1069	con = 36.0;
1070	if (pos != -1) {
1071		ic = icache->vals[pos] * 1.0;
1072		un = unhalt->vals[pos] * 1.0;
1073	} else {
1074		ic = icache->sum * 1.0;
1075		un = unhalt->sum * 1.0;
1076	}
1077	res = (con * ic)/un;
1078	ret = printf("%1.3f", res);
1079	return(ret);
1080
1081}
1082
1083static int
1084lcp_stall(struct counters *cpu, int pos)
1085{
1086         /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1087	int ret;
1088	struct counters *ild;
1089	struct counters *unhalt;
1090	double un, d1, res;
1091
1092	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1093	ild = find_counter(cpu, "ILD_STALL.LCP");
1094	if (pos != -1) {
1095		d1 = ild->vals[pos] * 1.0;
1096		un = unhalt->vals[pos] * 1.0;
1097	} else {
1098		d1 = ild->sum * 1.0;
1099		un = unhalt->sum * 1.0;
1100	}
1101	res = d1/un;
1102	ret = printf("%1.3f", res);
1103	return(ret);
1104
1105}
1106
1107
1108static int
1109frontendstall(struct counters *cpu, int pos)
1110{
1111      /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1112	int ret;
1113	struct counters *idq;
1114	struct counters *unhalt;
1115	double con, un, id, res;
1116
1117	con = 4.0;
1118	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1119	idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1120	if (pos != -1) {
1121		id = idq->vals[pos] * 1.0;
1122		un = unhalt->vals[pos] * 1.0;
1123	} else {
1124		id = idq->sum * 1.0;
1125		un = unhalt->sum * 1.0;
1126	}
1127	res = id/(un * con);
1128	ret = printf("%1.3f", res);
1129	return(ret);
1130}
1131
1132static int
1133clears(struct counters *cpu, int pos)
1134{
1135	/* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1136	 *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
1137
1138	int ret;
1139	struct counters *clr1, *clr2, *clr3;
1140	struct counters *unhalt;
1141	double con, un, cl1, cl2, cl3, res;
1142
1143	con = 100.0;
1144	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1145	clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1146	clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1147	clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1148
1149	if (pos != -1) {
1150		cl1 = clr1->vals[pos] * 1.0;
1151		cl2 = clr2->vals[pos] * 1.0;
1152		cl3 = clr3->vals[pos] * 1.0;
1153		un = unhalt->vals[pos] * 1.0;
1154	} else {
1155		cl1 = clr1->sum * 1.0;
1156		cl2 = clr2->sum * 1.0;
1157		cl3 = clr3->sum * 1.0;
1158		un = unhalt->sum * 1.0;
1159	}
1160	res = ((cl1 + cl2 + cl3) * con)/un;
1161	ret = printf("%1.3f", res);
1162	return(ret);
1163}
1164
1165static int
1166microassist(struct counters *cpu, int pos)
1167{
1168	/* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1169	int ret;
1170	struct counters *idq;
1171	struct counters *unhalt;
1172	double un, id, res, con;
1173
1174	con = 4.0;
1175	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1176	idq = find_counter(cpu, "IDQ.MS_UOPS");
1177	if (pos != -1) {
1178		id = idq->vals[pos] * 1.0;
1179		un = unhalt->vals[pos] * 1.0;
1180	} else {
1181		id = idq->sum * 1.0;
1182		un = unhalt->sum * 1.0;
1183	}
1184	res = id/(un * con);
1185	ret = printf("%1.3f", res);
1186	return(ret);
1187}
1188
1189
1190static int
1191aliasing(struct counters *cpu, int pos)
1192{
1193	/* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1194	int ret;
1195	struct counters *ld;
1196	struct counters *unhalt;
1197	double un, lds, con, res;
1198
1199	con = 5.0;
1200	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1201	ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1202	if (pos != -1) {
1203		lds = ld->vals[pos] * 1.0;
1204		un = unhalt->vals[pos] * 1.0;
1205	} else {
1206		lds = ld->sum * 1.0;
1207		un = unhalt->sum * 1.0;
1208	}
1209	res = (lds * con)/un;
1210	ret = printf("%1.3f", res);
1211	return(ret);
1212}
1213
1214static int
1215fpassists(struct counters *cpu, int pos)
1216{
1217	/* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1218	int ret;
1219	struct counters *fp;
1220	struct counters *inst;
1221	double un, fpd, res;
1222
1223	inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1224	fp = find_counter(cpu, "FP_ASSIST.ANY");
1225	if (pos != -1) {
1226		fpd = fp->vals[pos] * 1.0;
1227		un = inst->vals[pos] * 1.0;
1228	} else {
1229		fpd = fp->sum * 1.0;
1230		un = inst->sum * 1.0;
1231	}
1232	res = fpd/un;
1233	ret = printf("%1.3f", res);
1234	return(ret);
1235}
1236
1237static int
1238otherassistavx(struct counters *cpu, int pos)
1239{
1240	/* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
1241	int ret;
1242	struct counters *oth;
1243	struct counters *unhalt;
1244	double un, ot, con, res;
1245
1246	con = 75.0;
1247	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1248	oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1249	if (pos != -1) {
1250		ot = oth->vals[pos] * 1.0;
1251		un = unhalt->vals[pos] * 1.0;
1252	} else {
1253		ot = oth->sum * 1.0;
1254		un = unhalt->sum * 1.0;
1255	}
1256	res = (ot * con)/un;
1257	ret = printf("%1.3f", res);
1258	return(ret);
1259}
1260
1261static int
1262otherassistsse(struct counters *cpu, int pos)
1263{
1264
1265	int ret;
1266	struct counters *oth;
1267	struct counters *unhalt;
1268	double un, ot, con, res;
1269
1270	/* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
1271	con = 75.0;
1272	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1273	oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1274	if (pos != -1) {
1275		ot = oth->vals[pos] * 1.0;
1276		un = unhalt->vals[pos] * 1.0;
1277	} else {
1278		ot = oth->sum * 1.0;
1279		un = unhalt->sum * 1.0;
1280	}
1281	res = (ot * con)/un;
1282	ret = printf("%1.3f", res);
1283	return(ret);
1284}
1285
1286static int
1287efficiency1(struct counters *cpu, int pos)
1288{
1289
1290	int ret;
1291	struct counters *uops;
1292	struct counters *unhalt;
1293	double un, ot, con, res;
1294
1295        /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1296	con = 4.0;
1297	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1298	uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1299	if (pos != -1) {
1300		ot = uops->vals[pos] * 1.0;
1301		un = unhalt->vals[pos] * 1.0;
1302	} else {
1303		ot = uops->sum * 1.0;
1304		un = unhalt->sum * 1.0;
1305	}
1306	res = ot/(con * un);
1307	ret = printf("%1.3f", res);
1308	return(ret);
1309}
1310
1311static int
1312efficiency2(struct counters *cpu, int pos)
1313{
1314
1315	int ret;
1316	struct counters *uops;
1317	struct counters *unhalt;
1318	double un, ot, res;
1319
1320        /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1321	unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1322	uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1323	if (pos != -1) {
1324		ot = uops->vals[pos] * 1.0;
1325		un = unhalt->vals[pos] * 1.0;
1326	} else {
1327		ot = uops->sum * 1.0;
1328		un = unhalt->sum * 1.0;
1329	}
1330	res = un/ot;
1331	ret = printf("%1.3f", res);
1332	return(ret);
1333}
1334
1335#define SANDY_BRIDGE_COUNT 20
1336static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1337/*01*/	{ "allocstall1", "thresh > .05",
1338	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1339	  allocstall1 },
1340/*02*/	{ "allocstall2", "thresh > .05",
1341	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1",
1342	  allocstall2 },
1343/*03*/	{ "br_miss", "thresh >= .2",
1344	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1345	  br_mispredict },
1346/*04*/	{ "splitload", "thresh >= .1",
1347	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1348	  splitload },
1349/*05*/	{ "splitstore", "thresh >= .01",
1350	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1351	  splitstore },
1352/*06*/	{ "contested", "thresh >= .05",
1353	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1354	  contested },
1355/*07*/	{ "blockstorefwd", "thresh >= .05",
1356	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1357	  blockstoreforward },
1358/*08*/	{ "cache2", "thresh >= .2",
1359	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1360	  cache2 },
1361/*09*/	{ "cache1", "thresh >= .2",
1362	  "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1363	  cache1 },
1364/*10*/	{ "dtlbmissload", "thresh >= .1",
1365	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1366	  dtlb_missload },
1367/*11*/	{ "dtlbmissstore", "thresh >= .05",
1368	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1369	  dtlb_missstore },
1370/*12*/	{ "frontendstall", "thresh >= .15",
1371	  "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1372	  frontendstall },
1373/*13*/	{ "clears", "thresh >= .02",
1374	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1375	  clears },
1376/*14*/	{ "microassist", "thresh >= .05",
1377	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1378	  microassist },
1379/*15*/	{ "aliasing_4k", "thresh >= .1",
1380	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1381	  aliasing },
1382/*16*/	{ "fpassist", "look for a excessive value",
1383	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1384	  fpassists },
1385/*17*/	{ "otherassistavx", "look for a excessive value",
1386	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1387	  otherassistavx },
1388/*18*/	{ "otherassistsse", "look for a excessive value",
1389	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1390	  otherassistsse },
1391/*19*/	{ "eff1", "thresh < .9",
1392	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1393	  efficiency1 },
1394/*20*/	{ "eff2", "thresh > 1.0",
1395	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1396	  efficiency2 },
1397};
1398
1399
1400#define IVY_BRIDGE_COUNT 21
1401static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1402/*1*/	{ "eff1", "thresh < .75",
1403	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1404	  efficiency1 },
1405/*2*/	{ "eff2", "thresh > 1.0",
1406	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1407	  efficiency2 },
1408/*3*/	{ "itlbmiss", "thresh > .05",
1409	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1410	  itlb_miss },
1411/*4*/	{ "icachemiss", "thresh > .05",
1412	  "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1413	  icache_miss },
1414/*5*/	{ "lcpstall", "thresh > .05",
1415	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1416	  lcp_stall },
1417/*6*/	{ "cache1", "thresh >= .2",
1418	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1419	  cache1ib },
1420/*7*/	{ "cache2", "thresh >= .2",
1421	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1422	  cache2ib },
1423/*8*/	{ "contested", "thresh >= .05",
1424	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1425	  contested },
1426/*9*/	{ "datashare", "thresh >= .05",
1427	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1428	  datasharing },
1429/*10*/	{ "blockstorefwd", "thresh >= .05",
1430	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1431	  blockstoreforward },
1432/*11*/	{ "splitload", "thresh >= .1",
1433	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1434	  splitloadib },
1435/*12*/	{ "splitstore", "thresh >= .01",
1436	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1437	  splitstore },
1438/*13*/	{ "aliasing_4k", "thresh >= .1",
1439	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1440	  aliasing },
1441/*14*/	{ "dtlbmissload", "thresh >= .1",
1442	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1443	  dtlb_missload },
1444/*15*/	{ "dtlbmissstore", "thresh >= .05",
1445	  "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1446	  dtlb_missstore },
1447/*16*/	{ "br_miss", "thresh >= .2",
1448	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1449	  br_mispredictib },
1450/*17*/	{ "clears", "thresh >= .02",
1451	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1452	  clears },
1453/*18*/	{ "microassist", "thresh >= .05",
1454	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1455	  microassist },
1456/*19*/	{ "fpassist", "look for a excessive value",
1457	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1458	  fpassists },
1459/*20*/	{ "otherassistavx", "look for a excessive value",
1460	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1461	  otherassistavx },
1462/*21*/	{ "otherassistsse", "look for a excessive value",
1463	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1464	  otherassistsse },
1465};
1466
1467#define HASWELL_COUNT 20
1468static struct cpu_entry haswell[HASWELL_COUNT] = {
1469/*1*/	{ "eff1", "thresh < .75",
1470	  "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1471	  efficiency1 },
1472/*2*/	{ "eff2", "thresh > 1.0",
1473	  "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1474	  efficiency2 },
1475/*3*/	{ "itlbmiss", "thresh > .05",
1476	  "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1477	  itlb_miss },
1478/*4*/	{ "icachemiss", "thresh > .05",
1479	  "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1",
1480	  icache_miss_has },
1481/*5*/	{ "lcpstall", "thresh > .05",
1482	  "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1483	  lcp_stall },
1484/*6*/	{ "cache1", "thresh >= .2",
1485	  "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1486	  cache1ib },
1487/*7*/	{ "cache2", "thresh >= .2",
1488	  "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1489	  cache2has },
1490/*8*/	{ "contested", "thresh >= .05",
1491	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1492	  contested_has },
1493/*9*/	{ "datashare", "thresh >= .05",
1494	  "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1495	  datasharing_has },
1496/*10*/	{ "blockstorefwd", "thresh >= .05",
1497	  "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1498	  blockstoreforward },
1499/*11*/	{ "splitload", "thresh >= .1",
1500	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1501	  splitload },
1502/*12*/	{ "splitstore", "thresh >= .01",
1503	  "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1504	  splitstore },
1505/*13*/	{ "aliasing_4k", "thresh >= .1",
1506	  "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1507	  aliasing },
1508/*14*/	{ "dtlbmissload", "thresh >= .1",
1509	  "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1510	  dtlb_missload },
1511/*15*/	{ "br_miss", "thresh >= .2",
1512	  "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1513	  br_mispredict },
1514/*16*/	{ "clears", "thresh >= .02",
1515	  "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1516	  clears },
1517/*17*/	{ "microassist", "thresh >= .05",
1518	  "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1519	  microassist },
1520/*18*/	{ "fpassist", "look for a excessive value",
1521	  "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1522	  fpassists },
1523/*19*/	{ "otherassistavx", "look for a excessive value",
1524	  "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1525	  otherassistavx },
1526/*20*/	{ "otherassistsse", "look for a excessive value",
1527	  "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1528	  otherassistsse },
1529};
1530
1531
1532static void
1533set_sandybridge(void)
1534{
1535	strcpy(the_cpu.cputype, "SandyBridge PMC");
1536	the_cpu.number = SANDY_BRIDGE_COUNT;
1537	the_cpu.ents = sandy_bridge;
1538	the_cpu.explain = explain_name_sb;
1539}
1540
1541static void
1542set_ivybridge(void)
1543{
1544	strcpy(the_cpu.cputype, "IvyBridge PMC");
1545	the_cpu.number = IVY_BRIDGE_COUNT;
1546	the_cpu.ents = ivy_bridge;
1547	the_cpu.explain = explain_name_ib;
1548}
1549
1550
1551static void
1552set_haswell(void)
1553{
1554	strcpy(the_cpu.cputype, "HASWELL PMC");
1555	the_cpu.number = HASWELL_COUNT;
1556	the_cpu.ents = haswell;
1557	the_cpu.explain = explain_name_has;
1558}
1559
1560static void
1561set_expression(char *name)
1562{
1563	int found = 0, i;
1564	for(i=0 ; i< the_cpu.number; i++) {
1565		if (strcmp(name, the_cpu.ents[i].name) == 0) {
1566			found = 1;
1567			expression = the_cpu.ents[i].func;
1568			command = the_cpu.ents[i].command;
1569			threshold = the_cpu.ents[i].thresh;
1570			break;
1571		}
1572	}
1573	if (!found) {
1574		printf("For CPU type %s we have no expression:%s\n",
1575		       the_cpu.cputype, name);
1576		exit(-1);
1577	}
1578}
1579
1580
1581
1582
1583
1584static int
1585validate_expression(char *name)
1586{
1587	int i, found;
1588
1589	found = 0;
1590	for(i=0 ; i< the_cpu.number; i++) {
1591		if (strcmp(name, the_cpu.ents[i].name) == 0) {
1592			found = 1;
1593			break;
1594		}
1595	}
1596	if (!found) {
1597		return(-1);
1598	}
1599	return (0);
1600}
1601
1602static void
1603do_expression(struct counters *cpu, int pos)
1604{
1605	if (expression == NULL)
1606		return;
1607	(*expression)(cpu, pos);
1608}
1609
1610static void
1611process_header(int idx, char *p)
1612{
1613	struct counters *up;
1614	int i, len, nlen;
1615	/*
1616	 * Given header element idx, at p in
1617	 * form 's/NN/nameof'
1618	 * process the entry to pull out the name and
1619	 * the CPU number.
1620	 */
1621	if (strncmp(p, "s/", 2)) {
1622		printf("Check -- invalid header no s/ in %s\n",
1623		       p);
1624		return;
1625	}
1626	up = &cnts[idx];
1627	up->cpu = strtol(&p[2], NULL, 10);
1628	len = strlen(p);
1629	for (i=2; i<len; i++) {
1630		if (p[i] == '/') {
1631			nlen = strlen(&p[(i+1)]);
1632			if (nlen < (MAX_NLEN-1)) {
1633				strcpy(up->counter_name, &p[(i+1)]);
1634			} else {
1635				strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
1636			}
1637		}
1638	}
1639}
1640
1641static void
1642build_counters_from_header(FILE *io)
1643{
1644	char buffer[8192], *p;
1645	int i, len, cnt;
1646	size_t mlen;
1647
1648	/* We have a new start, lets
1649	 * setup our headers and cpus.
1650	 */
1651	if (fgets(buffer, sizeof(buffer), io) == NULL) {
1652		printf("First line can't be read from file err:%d\n", errno);
1653		return;
1654	}
1655	/*
1656	 * Ok output is an array of counters. Once
1657	 * we start to read the values in we must
1658	 * put them in there slot to match there CPU and
1659	 * counter being updated. We create a mass array
1660	 * of the counters, filling in the CPU and
1661	 * counter name.
1662	 */
1663	/* How many do we get? */
1664	len = strlen(buffer);
1665	for (i=0, cnt=0; i<len; i++) {
1666		if (strncmp(&buffer[i], "s/", 2) == 0) {
1667			cnt++;
1668			for(;i<len;i++) {
1669				if (buffer[i] == ' ')
1670					break;
1671			}
1672		}
1673	}
1674	mlen = sizeof(struct counters) * cnt;
1675	cnts = malloc(mlen);
1676	ncnts = cnt;
1677	if (cnts == NULL) {
1678		printf("No memory err:%d\n", errno);
1679		return;
1680	}
1681	memset(cnts, 0, mlen);
1682	for (i=0, cnt=0; i<len; i++) {
1683		if (strncmp(&buffer[i], "s/", 2) == 0) {
1684			p = &buffer[i];
1685			for(;i<len;i++) {
1686				if (buffer[i] == ' ') {
1687					buffer[i] = 0;
1688					break;
1689				}
1690			}
1691			process_header(cnt, p);
1692			cnt++;
1693		}
1694	}
1695	if (verbose)
1696		printf("We have %d entries\n", cnt);
1697}
1698extern int max_to_collect;
1699int max_to_collect = MAX_COUNTER_SLOTS;
1700
1701static int
1702read_a_line(FILE *io)
1703{
1704	char buffer[8192], *p, *stop;
1705	int pos, i;
1706
1707	if (fgets(buffer, sizeof(buffer), io) == NULL) {
1708		return(0);
1709	}
1710	p = buffer;
1711	for (i=0; i<ncnts; i++) {
1712		pos = cnts[i].pos;
1713		cnts[i].vals[pos] = strtol(p, &stop, 0);
1714		cnts[i].pos++;
1715		cnts[i].sum += cnts[i].vals[pos];
1716		p = stop;
1717	}
1718	return (1);
1719}
1720
1721extern int cpu_count_out;
1722int cpu_count_out=0;
1723
1724static void
1725print_header(void)
1726{
1727	int i, cnt, printed_cnt;
1728
1729	printf("*********************************\n");
1730	for(i=0, cnt=0; i<MAX_CPU; i++) {
1731		if (glob_cpu[i]) {
1732			cnt++;
1733		}
1734	}
1735	cpu_count_out = cnt;
1736	for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
1737		if (glob_cpu[i]) {
1738			printf("CPU%d", i);
1739			printed_cnt++;
1740		}
1741		if (printed_cnt == cnt) {
1742			printf("\n");
1743			break;
1744		} else {
1745			printf("\t");
1746		}
1747	}
1748}
1749
1750static void
1751lace_cpus_together(void)
1752{
1753	int i, j, lace_cpu;
1754	struct counters *cpat, *at;
1755
1756	for(i=0; i<ncnts; i++) {
1757		cpat = &cnts[i];
1758		if (cpat->next_cpu) {
1759			/* Already laced in */
1760			continue;
1761		}
1762		lace_cpu = cpat->cpu;
1763		if (lace_cpu >= MAX_CPU) {
1764			printf("CPU %d to big\n", lace_cpu);
1765			continue;
1766		}
1767		if (glob_cpu[lace_cpu] == NULL) {
1768			glob_cpu[lace_cpu] = cpat;
1769		} else {
1770			/* Already processed this cpu */
1771			continue;
1772		}
1773		/* Ok look forward for cpu->cpu and link in */
1774		for(j=(i+1); j<ncnts; j++) {
1775			at = &cnts[j];
1776			if (at->next_cpu) {
1777				continue;
1778			}
1779			if (at->cpu == lace_cpu) {
1780				/* Found one */
1781				cpat->next_cpu = at;
1782				cpat = at;
1783			}
1784		}
1785	}
1786}
1787
1788
1789static void
1790process_file(char *filename)
1791{
1792	FILE *io;
1793	int i;
1794	int line_at, not_done;
1795	pid_t pid_of_command=0;
1796
1797	if (filename ==  NULL) {
1798		io = my_popen(command, "r", &pid_of_command);
1799	} else {
1800		io = fopen(filename, "r");
1801		if (io == NULL) {
1802			printf("Can't process file %s err:%d\n",
1803			       filename, errno);
1804			return;
1805		}
1806	}
1807	build_counters_from_header(io);
1808	if (cnts == NULL) {
1809		/* Nothing we can do */
1810		printf("Nothing to do -- no counters built\n");
1811		if (io) {
1812			fclose(io);
1813		}
1814		return;
1815	}
1816	lace_cpus_together();
1817	print_header();
1818	if (verbose) {
1819		for (i=0; i<ncnts; i++) {
1820			printf("Counter:%s cpu:%d index:%d\n",
1821			       cnts[i].counter_name,
1822			       cnts[i].cpu, i);
1823		}
1824	}
1825	line_at = 0;
1826	not_done = 1;
1827	while(not_done) {
1828		if (read_a_line(io)) {
1829			line_at++;
1830		} else {
1831			break;
1832		}
1833		if (line_at >= max_to_collect) {
1834			not_done = 0;
1835		}
1836		if (filename == NULL) {
1837			int cnt;
1838			/* For the ones we dynamically open we print now */
1839			for(i=0, cnt=0; i<MAX_CPU; i++) {
1840				do_expression(glob_cpu[i], (line_at-1));
1841				cnt++;
1842				if (cnt == cpu_count_out) {
1843					printf("\n");
1844					break;
1845				} else {
1846					printf("\t");
1847				}
1848			}
1849		}
1850	}
1851	if (filename) {
1852		fclose(io);
1853	} else {
1854		my_pclose(io, pid_of_command);
1855	}
1856}
1857#if defined(__amd64__)
1858#define cpuid(in,a,b,c,d)\
1859  asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
1860#else
1861#define cpuid(in, a, b, c, d)
1862#endif
1863
1864static void
1865get_cpuid_set(void)
1866{
1867	unsigned long eax, ebx, ecx, edx;
1868	int model;
1869	pid_t pid_of_command=0;
1870	size_t sz, len;
1871	FILE *io;
1872	char linebuf[1024], *str;
1873
1874	eax = ebx = ecx = edx = 0;
1875
1876	cpuid(0, eax, ebx, ecx, edx);
1877	if (ebx == 0x68747541) {
1878		printf("AMD processors are not supported by this program\n");
1879		printf("Sorry\n");
1880		exit(0);
1881	} else if (ebx == 0x6972794) {
1882		printf("Cyrix processors are not supported by this program\n");
1883		printf("Sorry\n");
1884		exit(0);
1885	} else if (ebx == 0x756e6547) {
1886		printf("Genuine Intel\n");
1887	} else {
1888		printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
1889		exit(0);
1890	}
1891	cpuid(1, eax, ebx, ecx, edx);
1892	model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
1893	printf("CPU model is 0x%x id:0x%lx\n", model, eax);
1894	switch (eax & 0xF00) {
1895	case 0x500:		/* Pentium family processors */
1896		printf("Intel Pentium P5\n");
1897		goto not_supported;
1898		break;
1899	case 0x600:		/* Pentium Pro, Celeron, Pentium II & III */
1900		switch (model) {
1901		case 0x1:
1902			printf("Intel Pentium P6\n");
1903			goto not_supported;
1904			break;
1905		case 0x3:
1906		case 0x5:
1907			printf("Intel PII\n");
1908			goto not_supported;
1909			break;
1910		case 0x6: case 0x16:
1911			printf("Intel CL\n");
1912			goto not_supported;
1913			break;
1914		case 0x7: case 0x8: case 0xA: case 0xB:
1915			printf("Intel PIII\n");
1916			goto not_supported;
1917			break;
1918		case 0x9: case 0xD:
1919			printf("Intel PM\n");
1920			goto not_supported;
1921			break;
1922		case 0xE:
1923			printf("Intel CORE\n");
1924			goto not_supported;
1925			break;
1926		case 0xF:
1927			printf("Intel CORE2\n");
1928			goto not_supported;
1929			break;
1930		case 0x17:
1931			printf("Intel CORE2EXTREME\n");
1932			goto not_supported;
1933			break;
1934		case 0x1C:	/* Per Intel document 320047-002. */
1935			printf("Intel ATOM\n");
1936			goto not_supported;
1937			break;
1938		case 0x1A:
1939		case 0x1E:	/*
1940				 * Per Intel document 253669-032 9/2009,
1941				 * pages A-2 and A-57
1942				 */
1943		case 0x1F:	/*
1944				 * Per Intel document 253669-032 9/2009,
1945				 * pages A-2 and A-57
1946				 */
1947			printf("Intel COREI7\n");
1948			goto not_supported;
1949			break;
1950		case 0x2E:
1951			printf("Intel NEHALEM\n");
1952			goto not_supported;
1953			break;
1954		case 0x25:	/* Per Intel document 253669-033US 12/2009. */
1955		case 0x2C:	/* Per Intel document 253669-033US 12/2009. */
1956			printf("Intel WESTMERE\n");
1957			goto not_supported;
1958			break;
1959		case 0x2F:	/* Westmere-EX, seen in wild */
1960			printf("Intel WESTMERE\n");
1961			goto not_supported;
1962			break;
1963		case 0x2A:	/* Per Intel document 253669-039US 05/2011. */
1964			printf("Intel SANDYBRIDGE\n");
1965			set_sandybridge();
1966			break;
1967		case 0x2D:	/* Per Intel document 253669-044US 08/2012. */
1968			printf("Intel SANDYBRIDGE_XEON\n");
1969			set_sandybridge();
1970			break;
1971		case 0x3A:	/* Per Intel document 253669-043US 05/2012. */
1972			printf("Intel IVYBRIDGE\n");
1973			set_ivybridge();
1974			break;
1975		case 0x3E:	/* Per Intel document 325462-045US 01/2013. */
1976			printf("Intel IVYBRIDGE_XEON\n");
1977			set_ivybridge();
1978			break;
1979		case 0x3F:	/* Per Intel document 325462-045US 09/2014. */
1980			printf("Intel HASWELL (Xeon)\n");
1981			set_haswell();
1982			break;
1983		case 0x3C:	/* Per Intel document 325462-045US 01/2013. */
1984		case 0x45:
1985		case 0x46:
1986			printf("Intel HASWELL\n");
1987			set_haswell();
1988			break;
1989		case 0x4D:
1990			/* Per Intel document 330061-001 01/2014. */
1991			printf("Intel ATOM_SILVERMONT\n");
1992			goto not_supported;
1993			break;
1994		default:
1995			printf("Intel model 0x%x is not known -- sorry\n",
1996			       model);
1997			goto not_supported;
1998			break;
1999		}
2000		break;
2001	case 0xF00:		/* P4 */
2002		printf("Intel unknown model %d\n", model);
2003		goto not_supported;
2004		break;
2005	}
2006	/* Ok lets load the list of all known PMC's */
2007	io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2008	if (valid_pmcs == NULL) {
2009		/* Likely */
2010		pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2011		sz = sizeof(char *) * pmc_allocated_cnt;
2012		valid_pmcs = malloc(sz);
2013		if (valid_pmcs == NULL) {
2014			printf("No memory allocation fails at startup?\n");
2015			exit(-1);
2016		}
2017		memset(valid_pmcs, 0, sz);
2018	}
2019
2020	while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2021		if (linebuf[0] != '\t') {
2022			/* sometimes headers ;-) */
2023			continue;
2024		}
2025		len = strlen(linebuf);
2026		if (linebuf[(len-1)] == '\n') {
2027			/* Likely */
2028			linebuf[(len-1)] = 0;
2029		}
2030		str = &linebuf[1];
2031		len = strlen(str) + 1;
2032		valid_pmcs[valid_pmc_cnt] = malloc(len);
2033		if (valid_pmcs[valid_pmc_cnt] == NULL) {
2034			printf("No memory2 allocation fails at startup?\n");
2035			exit(-1);
2036		}
2037		memset(valid_pmcs[valid_pmc_cnt], 0, len);
2038		strcpy(valid_pmcs[valid_pmc_cnt], str);
2039		valid_pmc_cnt++;
2040		if (valid_pmc_cnt >= pmc_allocated_cnt) {
2041			/* Got to expand -- unlikely */
2042			char **more;
2043
2044			sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2045			more = malloc(sz);
2046			if (more == NULL) {
2047				printf("No memory3 allocation fails at startup?\n");
2048				exit(-1);
2049			}
2050			memset(more, 0, sz);
2051			memcpy(more, valid_pmcs, sz);
2052			pmc_allocated_cnt *= 2;
2053			free(valid_pmcs);
2054			valid_pmcs = more;
2055		}
2056	}
2057	my_pclose(io, pid_of_command);
2058	return;
2059not_supported:
2060	printf("Not supported\n");
2061	exit(-1);
2062}
2063
2064static void
2065explain_all(void)
2066{
2067	int i;
2068	printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2069	printf("-------------------------------------------------------------\n");
2070	for(i=0; i<the_cpu.number; i++){
2071		printf("For -e %s ", the_cpu.ents[i].name);
2072		(*the_cpu.explain)(the_cpu.ents[i].name);
2073		printf("----------------------------\n");
2074	}
2075}
2076
2077static void
2078test_for_a_pmc(const char *pmc, int out_so_far)
2079{
2080	FILE *io;
2081	pid_t pid_of_command=0;
2082	char my_command[1024];
2083	char line[1024];
2084	char resp[1024];
2085	int len, llen, i;
2086
2087	if (out_so_far < 50) {
2088		len = 50 - out_so_far;
2089		for(i=0; i<len; i++) {
2090			printf(" ");
2091		}
2092	}
2093	sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2094	io = my_popen(my_command, "r", &pid_of_command);
2095	if (io == NULL) {
2096		printf("Failed -- popen fails\n");
2097		return;
2098	}
2099	/* Setup what we expect */
2100	len = sprintf(resp, "%s", pmc);
2101	if (fgets(line, sizeof(line), io) == NULL) {
2102		printf("Failed -- no output from pmstat\n");
2103		goto out;
2104	}
2105	llen = strlen(line);
2106	if (line[(llen-1)] == '\n') {
2107		line[(llen-1)] = 0;
2108		llen--;
2109	}
2110	for(i=2; i<(llen-len); i++) {
2111		if (strncmp(&line[i], "ERROR", 5) == 0) {
2112			printf("Failed %s\n", line);
2113			goto out;
2114		} else if (strncmp(&line[i], resp, len) == 0) {
2115			int j, k;
2116
2117			if (fgets(line, sizeof(line), io) == NULL) {
2118				printf("Failed -- no second output from pmstat\n");
2119				goto out;
2120			}
2121			len = strlen(line);
2122			for (j=0; j<len; j++) {
2123				if (line[j] == ' ') {
2124					j++;
2125				} else {
2126					break;
2127				}
2128			}
2129			printf("Pass");
2130			len = strlen(&line[j]);
2131			if (len < 20) {
2132				for(k=0; k<(20-len); k++) {
2133					printf(" ");
2134				}
2135			}
2136			if (len) {
2137				printf("%s", &line[j]);
2138			} else {
2139				printf("\n");
2140			}
2141			goto out;
2142		}
2143	}
2144	printf("Failed -- '%s' not '%s'\n", line, resp);
2145out:
2146	my_pclose(io, pid_of_command);
2147
2148}
2149
2150static int
2151add_it_to(char **vars, int cur_cnt, char *name)
2152{
2153	int i;
2154	size_t len;
2155	for(i=0; i<cur_cnt; i++) {
2156		if (strcmp(vars[i], name) == 0) {
2157			/* Already have */
2158			return(0);
2159		}
2160	}
2161	if (vars[cur_cnt] != NULL) {
2162		printf("Cur_cnt:%d filled with %s??\n",
2163		       cur_cnt, vars[cur_cnt]);
2164		exit(-1);
2165	}
2166	/* Ok its new */
2167	len = strlen(name) + 1;
2168	vars[cur_cnt] = malloc(len);
2169	if (vars[cur_cnt] == NULL) {
2170		printf("No memory %s\n", __FUNCTION__);
2171		exit(-1);
2172	}
2173	memset(vars[cur_cnt], 0, len);
2174	strcpy(vars[cur_cnt], name);
2175	return(1);
2176}
2177
2178static char *
2179build_command_for_exp(struct expression *exp)
2180{
2181	/*
2182	 * Build the pmcstat command to handle
2183	 * the passed in expression.
2184	 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2185	 * where NNN and QQQ represent the PMC's in the expression
2186	 * uniquely..
2187	 */
2188	char forming[1024];
2189	int cnt_pmc, alloced_pmcs, i;
2190	struct expression *at;
2191	char **vars, *cmd;
2192	size_t mal;
2193
2194	alloced_pmcs = cnt_pmc = 0;
2195	/* first how many do we have */
2196	at = exp;
2197	while (at) {
2198		if (at->type == TYPE_VALUE_PMC) {
2199			cnt_pmc++;
2200		}
2201		at = at->next;
2202	}
2203	if (cnt_pmc == 0) {
2204		printf("No PMC's in your expression -- nothing to do!!\n");
2205		exit(0);
2206	}
2207	mal = cnt_pmc * sizeof(char *);
2208	vars = malloc(mal);
2209	if (vars == NULL) {
2210		printf("No memory\n");
2211		exit(-1);
2212	}
2213	memset(vars, 0, mal);
2214	at = exp;
2215	while (at) {
2216		if (at->type == TYPE_VALUE_PMC) {
2217			if(add_it_to(vars, alloced_pmcs, at->name)) {
2218				alloced_pmcs++;
2219			}
2220		}
2221		at = at->next;
2222	}
2223	/* Now we have a unique list in vars so create our command */
2224	mal = 23; /*	"/usr/sbin/pmcstat -w 1"  + \0 */
2225	for(i=0; i<alloced_pmcs; i++) {
2226		mal += strlen(vars[i]) + 4;	/* var + " -s " */
2227	}
2228	cmd = malloc((mal+2));
2229	if (cmd == NULL) {
2230		printf("%s out of mem\n", __FUNCTION__);
2231		exit(-1);
2232	}
2233	memset(cmd, 0, (mal+2));
2234	strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2235	at = exp;
2236	for(i=0; i<alloced_pmcs; i++) {
2237		sprintf(forming, " -s %s", vars[i]);
2238		strcat(cmd, forming);
2239		free(vars[i]);
2240		vars[i] = NULL;
2241	}
2242	free(vars);
2243	return(cmd);
2244}
2245
2246static int
2247user_expr(struct counters *cpu, int pos)
2248{
2249	int ret;
2250	double res;
2251	struct counters *var;
2252	struct expression *at;
2253
2254	at = master_exp;
2255	while (at) {
2256		if (at->type == TYPE_VALUE_PMC) {
2257			var = find_counter(cpu, at->name);
2258			if (var == NULL) {
2259				printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2260				exit(-1);
2261			}
2262			if (pos != -1) {
2263				at->value = var->vals[pos] * 1.0;
2264			} else {
2265				at->value = var->sum * 1.0;
2266			}
2267		}
2268		at = at->next;
2269	}
2270	res = run_expr(master_exp, 1, NULL);
2271	ret = printf("%1.3f", res);
2272	return(ret);
2273}
2274
2275
2276static void
2277set_manual_exp(struct expression *exp)
2278{
2279	expression = user_expr;
2280	command = build_command_for_exp(exp);
2281	threshold = "User defined threshold";
2282}
2283
2284static void
2285run_tests(void)
2286{
2287	int i, lenout;
2288	printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2289	printf("------------------------------------------------------------------------\n");
2290	for(i=0; i<valid_pmc_cnt; i++) {
2291		lenout = printf("%s", valid_pmcs[i]);
2292		fflush(stdout);
2293		test_for_a_pmc(valid_pmcs[i], lenout);
2294	}
2295}
2296static void
2297list_all(void)
2298{
2299	int i, cnt, j;
2300	printf("PMC                                               Abbreviation\n");
2301	printf("--------------------------------------------------------------\n");
2302	for(i=0; i<valid_pmc_cnt; i++) {
2303		cnt = printf("%s", valid_pmcs[i]);
2304		for(j=cnt; j<52; j++) {
2305			printf(" ");
2306		}
2307		printf("%%%d\n", i);
2308	}
2309}
2310
2311
2312int
2313main(int argc, char **argv)
2314{
2315	int i, j, cnt;
2316	char *filename=NULL;
2317	char *name=NULL;
2318	int help_only = 0;
2319	int test_mode = 0;
2320
2321	get_cpuid_set();
2322	memset(glob_cpu, 0, sizeof(glob_cpu));
2323	while ((i = getopt(argc, argv, "LHhvm:i:?e:TE:")) != -1) {
2324		switch (i) {
2325		case 'L':
2326			list_all();
2327			return(0);
2328		case 'H':
2329			printf("**********************************\n");
2330			explain_all();
2331			printf("**********************************\n");
2332			return(0);
2333			break;
2334		case 'T':
2335			test_mode = 1;
2336			break;
2337		case 'E':
2338			master_exp = parse_expression(optarg);
2339			if (master_exp) {
2340				set_manual_exp(master_exp);
2341			}
2342			break;
2343		case 'e':
2344			if (validate_expression(optarg)) {
2345				printf("Unknown expression %s\n", optarg);
2346				return(0);
2347			}
2348			name = optarg;
2349			set_expression(optarg);
2350			break;
2351		case 'm':
2352			max_to_collect = strtol(optarg, NULL, 0);
2353			if (max_to_collect > MAX_COUNTER_SLOTS) {
2354				/* You can't collect more than max in array */
2355				max_to_collect = MAX_COUNTER_SLOTS;
2356			}
2357			break;
2358		case 'v':
2359			verbose++;
2360			break;
2361		case 'h':
2362			help_only = 1;
2363			break;
2364		case 'i':
2365			filename = optarg;
2366			break;
2367		case '?':
2368		default:
2369		use:
2370			printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2371			       argv[0]);
2372			printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2373			printf("-v -- verbose dump debug type things -- you don't want this\n");
2374			printf("-m N -- maximum to collect is N measurments\n");
2375			printf("-e expr-name -- Do expression expr-name\n");
2376			printf("-E 'your expression' -- Do your expression\n");
2377			printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2378			printf("-H -- Don't run anything, just explain all canned expressions\n");
2379			printf("-T -- Test all PMC's defined by this processor\n");
2380			return(0);
2381			break;
2382		};
2383	}
2384	if ((name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) {
2385		printf("Without setting an expression we cannot dynamically gather information\n");
2386		printf("you must supply a filename (and you probably want verbosity)\n");
2387		goto use;
2388	}
2389	if (test_mode) {
2390		run_tests();
2391		return(0);
2392	}
2393	printf("*********************************\n");
2394	if (master_exp == NULL) {
2395		(*the_cpu.explain)(name);
2396	} else {
2397		printf("Examine your expression ");
2398		print_exp(master_exp);
2399		printf("User defined threshold\n");
2400	}
2401	if (help_only) {
2402		return(0);
2403	}
2404	process_file(filename);
2405	if (verbose >= 2) {
2406		for (i=0; i<ncnts; i++) {
2407			printf("Counter:%s cpu:%d index:%d\n",
2408			       cnts[i].counter_name,
2409			       cnts[i].cpu, i);
2410			for(j=0; j<cnts[i].pos; j++) {
2411				printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2412			}
2413			printf(" sum - %ld\n", (long int)cnts[i].sum);
2414		}
2415	}
2416	if (expression == NULL) {
2417		return(0);
2418	}
2419	for(i=0, cnt=0; i<MAX_CPU; i++) {
2420		if (glob_cpu[i]) {
2421			do_expression(glob_cpu[i], -1);
2422			cnt++;
2423			if (cnt == cpu_count_out) {
2424				printf("\n");
2425				break;
2426			} else {
2427				printf("\t");
2428			}
2429		}
2430	}
2431	return(0);
2432}
2433