1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/time_impl.h>
29#include <sys/wait.h>
30#include <stdio.h>
31#include <stdio_ext.h>
32#include <stdlib.h>
33#include <stdarg.h>
34#include <ctype.h>
35#include <time.h>
36#include <fcntl.h>
37#include <sys/stat.h>
38#include <sys/resource.h>
39#include <limits.h>
40#include <string.h>
41#include <unistd.h>
42#include <errno.h>
43#include <signal.h>
44#include <libdevinfo.h>
45#define	_KERNEL
46#include <sys/dditypes.h>
47#include <sys/sunddi.h>
48#include <sys/bofi.h>
49
50#define	BOFI_DEV	"/devices/pseudo/bofi@0:bofi,ctl"
51
52#define	GETSTRUCT(s, num)	\
53	((s *) memalign(sizeof (void*), (num) * sizeof (s)))
54
55#define	MAXEDEFS	(0x64)		/* controls max no of concurent edefs */
56#define	DFLTLOGSZ	(0x4000)	/* default size of an access log */
57#define	DFLT_NONPIO_LOGSZ	(0x400)	/* default size of a log */
58#define	MAXALRMCALL	(0x1000ull)	/* alarm does not permit big values */
59#define	MIN_REPORT_TIME	(5)		/* min time to wait for edef status */
60#define	DISTRIB_CUTOFF	(3)		/* useful when reducing a log */
61#define	myLLMAX		(0x7fffffffffffffffll)
62#define	myULLMAX	(0xffffffffffffffffull)
63
64/*
65 * default interval to wait between kicking off workload and injecting fault
66 */
67#define	DEFAULT_EDEF_SLEEP 3
68/*
69 * when generating dma corruptions, it is best to corrupt each double word
70 * individually for control areas - however for data areas this can be
71 * excessive and would generate so many cases we would never finish the run.
72 * So set a cut-off value where we switch from corrupting each double word
73 * separately to corrupting th elot in one go. 0x100 bytes seems a good value
74 * on the drivers we have seen so far.
75 */
76#define	DMA_INDIVIDUAL_CORRUPT_CUTOFF 0x100
77
78struct collector_def {
79	struct bofi_errdef ed;		/* definition of the log criteria */
80	struct bofi_errstate es;	/* the current status of the log */
81	struct acc_log_elem *lp;	/* array of logged accesses */
82	pid_t pid;
83};
84
85static uint16_t policy;
86
87#define	BYTEPOLICY	(0xf)
88#define	MULTIPOLICY	(0x10)
89#define	SIZEPOLICY	(BYTEPOLICY|MULTIPOLICY)
90#define	UNBIASEDPOLICY	0x20
91#define	UNCOMMONPOLICY	0x40
92#define	COMMONPOLICY	0x80
93#define	MEDIANPOLICY	0x100
94#define	MAXIMALPOLICY	0x200
95#define	OPERATORSPOLICY	0x400
96#define	VALIDPOLICY	(0x7ff)
97
98typedef
99struct coding {
100	char	*str;
101	uint_t	code;
102} coding_t;
103
104static coding_t ptypes[] = {
105	{"onebyte", 0x1}, {"twobyte", 0x2},
106	{"fourbyte", 0x4}, {"eightbyte", 0x8},
107	{"multibyte", 0x10}, {"unbiased", 0x20}, {"uncommon", 0x40},
108	{"common", 0x80}, {"median", 0x100}, {"maximal", 0x200},
109	{"operators", 0x400},  {0, 0}
110};
111static coding_t atypes[] = {
112	{"pio_r", BOFI_PIO_R}, {"pio_w", BOFI_PIO_W},
113	{"dma_r", BOFI_DMA_R}, {"dma_w", BOFI_DMA_W},
114	{"pio", BOFI_PIO_RW}, {"dma", BOFI_DMA_RW},
115	{"log", BOFI_LOG}, {"intr", BOFI_INTR},
116	{"PIO_R", BOFI_PIO_R}, {"PIO_W", BOFI_PIO_W},
117	{"DMA_R", BOFI_DMA_R}, {"DMA_W", BOFI_DMA_W},
118	{"PIO", BOFI_PIO_RW}, {"DMA", BOFI_DMA_RW},
119	{"LOG", BOFI_LOG}, {"INTR", BOFI_INTR}, {0, 0}
120};
121static coding_t optypes[] = {
122	{"EQ", BOFI_EQUAL}, {"AND", BOFI_AND}, {"OR", BOFI_OR},
123	{"XOR", BOFI_XOR}, {"NO", BOFI_NO_TRANSFER},
124	{"DELAY", BOFI_DELAY_INTR}, {"LOSE", BOFI_LOSE_INTR},
125	{"EXTRA", BOFI_EXTRA_INTR}, {0, 0}
126};
127static coding_t doptypes[] = {
128	{"EQ", BOFI_EQUAL}, {"AND", BOFI_AND}, {"OR", BOFI_OR},
129	{"XOR", BOFI_XOR}, {0, 0}
130};
131static coding_t ioptypes[] = {
132	{"DELAY", BOFI_DELAY_INTR}, {"LOSE", BOFI_LOSE_INTR},
133	{"EXTRA", BOFI_EXTRA_INTR}, {0, 0}
134};
135
136static const unsigned long long	DFLTLOGTIME	= -1ull; /* log forever */
137
138/*
139 * This global controls the generation of errdefs for PIO_W. The default should
140 * be to only perform an access check errdef but not to corrupt writes - this
141 * may trash non-FT platforms.
142 */
143static uint_t atype_is_default;	/* do not corrupt PIO_W by default */
144static uint_t lsize_is_default;	/* set when the user has not given a size */
145
146static uint64_t random_operand = 0xdeadbeafdeadbeafull;
147#define	NPIO_DEFAULTS	(3)	/* number of default corruption values */
148static longlong_t pio_default_values[NPIO_DEFAULTS] = {
149	0x0ull,			/* corresponds to a line going high/low */
150	0x32f1f03232f1f032ull,	/* the value returned when the fake ta is set */
151	(longlong_t)(~0)	/* corresponds to a line going high/low */
152};
153
154static uint_t dbglvl		= 0;	/* debug this program */
155static int alarmed		= 0;
156static int killed		= 0;
157
158/*
159 * name of a script to call before offlining a driver being tested
160 */
161static char **fixup_script = 0;
162static int	scriptargs = 0;
163static char **pargv;
164static int	pargc;
165
166static int	max_edef_wait = 0;
167static int	edef_sleep = 0;
168static int	do_status = 0;	/* report edef status in parsable format */
169static char *user_comment = 0;
170
171static char *Progname;
172static FILE *errfile;
173static FILE *outfile;
174
175/*
176 * The th_define utility provides an interface to the bus_ops fault injection
177 * bofi device driver for defining error injection specifications (referred to
178 * as errdefs). An errdef corresponds to a specification of how to corrupt a
179 * device driver's accesses to its hardware. The command line arguments
180 * determine the precise nature of the fault to be injected. If the supplied
181 * arguments define a consistent errdef, the th_define process will store the
182 * errdef with the bofi driver and suspend itself until the criteria given by
183 * the errdef become satisfied (in practice, this will occur when the access
184 * counts go to zero).
185 *
186 * When the resulting errdef is activated using the th_manage(1M) user command
187 * utility, the bofi driver will act upon the errdef by matching the number of
188 * hardware accesses - specified in count, that are of the type specified in
189 * acc_types, made by instance number instance - of the driver whose name is
190 * name, (or by the driver instance specified by * path ) to the register set
191 * (or DMA handle) specified by rnumber, that lie within the range offset to
192 * offset + length from the beginning of the register set or DMA handle. It then
193 * applies operator and operand to the next failcount matching accesses.
194 *
195 * If acc_types includes LOG, th_define runs in automatic test script generation
196 * mode, and a set of test scripts (written in the Korn shell) is created and
197 * placed in a sub-directory of the current directory with the name
198 * driver.test.<id>. A separate, executable script is generated for each access
199 * handle that matches the logging criteria. The log of accesses is placed at
200 * the top of each script as a record of the session. If the current directory
201 * is not writable, file output is written to standard output. The base name of
202 * each test file is the driver name, and the extension is a number that
203 * discriminates between different access handles. A control script (with the
204 * same name as the created test directory) is generated that will run all the
205 * test scripts sequentially.
206 *
207 * Executing the scripts will install, and then activate, the resulting error
208 * definitions. Error definitions are activated sequentially and the driver
209 * instance under test is taken offline and brought back online before each test
210 * (refer to the -e option for more information). By default, logging will apply
211 * to all PIO accesses, interrupts and DMA accesses to and from areas mapped
212 * for both reading and writing, but it can be constrained by specifying
213 * additional acc_types, rnumber, offset and length. Logging will continue for
214 * count matching accesses, with an optional time limit of collect_time seconds.
215 *
216 * Either the -n or -P option must be provided. The other options are optional.
217 * If an option (other than the -a option) is specified multiple times, only
218 * the final value for the option is used. If an option is not specified, its
219 * associated value is set to an appropriate default, which will provide
220 * maximal error coverage as described below.
221 */
222
223/*PRINTFLIKE2*/
224static void
225msg(uint_t lvl, char *msg, ...)
226{
227#define	BUFSZ	128
228
229	if (lvl <= dbglvl) {
230		int count;
231		va_list args;
232		char buf[BUFSZ];
233		int	pos = 0;
234
235		va_start(args, msg);
236		count = vsnprintf(buf, BUFSZ, msg, args);
237		va_end(args);
238		if (count > 0) {
239			count += pos;
240			if (count >= sizeof (buf))
241				count = BUFSZ - 1;
242			buf[count] = '\0';
243			(void) fprintf(errfile, "%s", buf);
244		}
245	}
246}
247
248static void
249kill_sighandler(int sig)
250{
251	switch (sig) {
252		case SIGALRM:
253			alarmed = 1;
254			break;
255		default:
256			killed = 1;
257			break;
258	}
259}
260
261static void
262set_handler(int sig)
263{
264	struct sigaction sa;
265
266	(void) sigfillset(&(sa.sa_mask));
267	sa.sa_flags = 0;
268	sa.sa_handler = kill_sighandler;
269	if (sigaction(sig, &sa, NULL) != 0)
270		/* install handler */
271		msg(0, "bad sigaction: %s\n", strerror(errno));
272}
273
274/*
275 * Compare two driver access handles
276 */
277static int
278hdl_cmp(const void *p1, const void *p2)
279{
280	struct handle_info *e1 = (struct handle_info *)p1;
281	struct handle_info *e2 = (struct handle_info *)p2;
282
283	if (e1->instance < e2->instance)
284		return (-1);
285	else if (e1->instance > e2->instance)
286		return (1);
287	else if (e1->access_type < e2->access_type)
288		return (-1);
289	else if (e1->access_type > e2->access_type)
290		return (1);
291	else if (e1->rnumber < e2->rnumber)
292		return (-1);
293	else if (e1->rnumber > e2->rnumber)
294		return (1);
295	else if (e1->len < e2->len)
296		return (-1);
297	else if (e1->len > e2->len)
298		return (1);
299	else if (e1->offset < e2->offset)
300		return (-1);
301	else if (e1->offset > e2->offset)
302		return (1);
303	else if (e1->addr_cookie < e2->addr_cookie)
304		return (-1);
305	else if (e1->addr_cookie > e2->addr_cookie)
306		return (1);
307	else
308		return (0);
309}
310
311/*
312 * Compare two hardware accesses.
313 */
314static int
315elem_cmp(const void *p1, const void *p2)
316{
317	struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
318	struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
319
320	if (e1->access_type < e2->access_type)
321		return (-1);
322	else if (e1->access_type > e2->access_type)
323		return (1);
324	else if (e1->offset < e2->offset)
325		return (-1);
326	else if (e1->offset > e2->offset)
327		return (1);
328	else if (e1->size < e2->size)
329		return (-1);
330	else if (e1->size > e2->size)
331		return (1);
332	else
333		return (0);
334}
335
336/*
337 * Another way of comparing two hardware accesses.
338 */
339static int
340log_cmp(const void *p1, const void *p2)
341{
342	struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
343	struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
344
345	int rval = elem_cmp(p1, p2);
346
347	if (rval == 0)
348		if (e1->repcount < e2->repcount)
349			return (-1);
350		else if (e1->repcount > e2->repcount)
351			return (1);
352		else
353			return (0);
354	else
355		return (rval);
356}
357
358/*
359 * And a final way of sorting a log (by access type followed by repcount).
360 */
361static int
362log_cmp2(const void *p1, const void *p2)
363{
364	struct acc_log_elem *e1 = (struct acc_log_elem *)p1;
365	struct acc_log_elem *e2 = (struct acc_log_elem *)p2;
366
367	if (e1->access_type < e2->access_type)
368		return (-1);
369	else if (e1->access_type > e2->access_type)
370		return (1);
371	else if (e1->repcount < e2->repcount)
372		return (-1);
373	else if (e1->repcount > e2->repcount)
374		return (1);
375	else
376		return (0);
377}
378
379static void
380dump_log(uint_t lvl, FILE *fp, struct acc_log_elem *items,
381	size_t nitems, uint_t logflags)
382{
383	if (lvl <= dbglvl) {
384		int i;
385		uint_t offset, allthesame = 1;
386
387		if (logflags & BOFI_LOG_TIMESTAMP &&
388		    getenv("DUMP_FULL_LOG") != 0)
389			allthesame = 0;
390		else
391			for (i = 1; i < nitems; i++)
392				if (elem_cmp(items+i, items) != 0)
393					allthesame = 0;
394		if (fp != 0)
395			(void) fprintf(fp,
396			    "# Logged Accesses:\n# %-4s\t%-12s\t%-4s\t%-18s"
397			    " (%-1s)\t%-10s\n\n", "type",
398			    (items->access_type & BOFI_DMA_RW) ?
399			    "address" : "offset",
400			    "size", "value", "repcnt", "time");
401
402		for (i = 0; i < nitems; i++, items++) {
403			offset = items->offset;
404			if (fp != 0) {
405				(void) fprintf(fp,
406				    "# 0x%-2x\t0x%-10x\t%-4d\t0x%-16llx"
407				    " (0x%-1x)\t%-8llu\n",
408				    items->access_type, offset, items->size,
409				    items->value, items->repcount,
410				    (logflags & BOFI_LOG_TIMESTAMP) ?
411				    items->access_time : 0ull);
412
413				if (allthesame) {
414					(void) fprintf(fp,
415					    "# Access duplicated %d times\n",
416					    nitems);
417					break;
418				}
419			} else
420				msg(lvl, "# 0x%x 0x%x %d 0x%llx(0x%x) %llu\n",
421				    items->access_type, offset, items->size,
422				    items->value, items->repcount,
423				    (logflags & BOFI_LOG_TIMESTAMP) ?
424				    items->access_time : 0ull);
425		}
426	}
427}
428
429static int
430str_to_bm(char *optarg, coding_t *c, uint_t *bm)
431{
432	char *str;
433	char *s = "\t\n ";
434	int err = EINVAL;
435
436	msg(2, "str_to_bm: optarg %s\n", optarg);
437	if (optarg != NULL && (str = strtok(optarg, s))) {
438		msg(2, "str_to_bm: str %s\n", str);
439		do {
440			for (; c->str != 0; c++)
441				if (strcmp(str, c->str) == 0) {
442					*bm |= c->code;
443					msg(2, "str_to_bm: %s matches\n",
444					    c->str);
445					err = 0;
446					break;
447				}
448		} while ((str = strtok(NULL, s)));
449	} else
450		return (EINVAL);
451	msg(2, "str_to_bm: done 0x%x\n", *bm);
452	return (err);
453}
454
455
456/*
457 * Generic routine for commands that apply to a particular instance of
458 * a driver under test (e.g. activate all errdefs defined on an instance).
459 */
460static int
461manage_instance(int fd, char *namep, int instance, int cmd)
462{
463	struct bofi_errctl errctl;
464
465	errctl.namesize = strlen(namep);
466	(void) strncpy(errctl.name, namep, MAXNAMELEN);
467	errctl.instance = instance;
468
469	msg(8, "manage_instance: %s %d\n", namep, instance);
470	if (ioctl(fd, cmd, &errctl) == -1) {
471		msg(0, "bofi ioctl %d failed: %s\n", cmd, strerror(errno));
472		return (-1);
473	}
474	return (0);
475}
476
477
478static int
479define_one_error(
480	FILE *fp,
481	struct bofi_errdef *edp,
482	struct acc_log_elem *item,
483	ulong_t	nttime,
484	ulong_t interval,
485	char	*type,
486	int fon,	/* corrupt after this many accesses */
487	size_t fcnt,	/* and then fail it fcnt times */
488	uint_t	acc_chk,
489	char	*opname,
490	uint64_t	operand)
491{
492	(void) fprintf(fp,
493	    "-n %s -i %d -r %d -l 0x%llx 0x%x -a %s -c %d %d -f %d"
494	    " -o %s 0x%llx",
495	    (char *)edp->name,
496	    edp->instance,
497	    edp->rnumber,
498	    edp->offset + item->offset,	/* offset into the regset */
499	    item->size,	/* corrupt addrs from offset to offset+size */
500	    type,
501	    fon,	/* corrupt after this many accesses */
502	    fcnt,	/* and then fail it fcnt times */
503	    acc_chk,
504	    opname,
505	    operand);
506
507	(void) fprintf(fp, " -w %lu %lu\n", nttime, interval);
508	return (0);
509}
510
511static void
512define_op_err(FILE *fp, int *ecnt, struct bofi_errdef *edp,
513	struct acc_log_elem *item, ulong_t nttime, ulong_t interval, char *type,
514	int fon, size_t fcnt)
515{
516	coding_t *ct;
517	char	*opname;
518	uint_t	op;
519	uint64_t	operand;
520	int k, save_size;
521	uint64_t save_offset;
522
523	if (item->access_type & BOFI_INTR)
524		ct = &ioptypes[0];
525	else
526		ct = &doptypes[0];
527
528	/*
529	 * errdefs for dma accesses are too numerous so assume that dma writes
530	 * (DDI_DMA_SYNC_FORDEV) create less exposure to potential errors than
531	 * do dma reads (DDI_DMA_SYNC_FORCPU).
532	 *
533	 * also by default do not corrupt PIO_W - it may hang a non-FT platform.
534	 */
535	if (item->access_type != BOFI_DMA_W &&
536	    ((item->access_type & BOFI_PIO_W) == 0 || !atype_is_default)) {
537		/*
538		 * user has asked for PIO_W
539		 */
540		for (; ct->str != 0; ct++) {
541			op = ct->code;
542			opname = ct->str;
543			switch (op) {
544			case BOFI_EQUAL:
545				operand = random_operand; /* a random value */
546				random_operand = lrand48() | ((uint64_t)
547				    (lrand48()) << 32);
548				break;
549			case BOFI_AND:
550				operand = 0xaddedabadb00bull;
551				break;
552			case BOFI_OR:
553				operand = 0x1;
554				break;
555			case BOFI_XOR:
556			default:
557				operand = myULLMAX;
558				break;
559			case BOFI_DELAY_INTR: /* delay for 1 msec */
560				operand = 1000000;
561				break;
562			case BOFI_LOSE_INTR: /* op not applicable */
563				operand = 0;
564				break;
565			case BOFI_EXTRA_INTR: /* extra intrs */
566				operand = 0xfff;
567				break;
568			}
569			*ecnt = *ecnt + 1;
570
571			if ((item->access_type == BOFI_DMA_W ||
572			    item->access_type == BOFI_DMA_R) &&
573			    item->size > sizeof (uint64_t) && item->size <
574			    DMA_INDIVIDUAL_CORRUPT_CUTOFF) {
575				save_size = item->size;
576				save_offset = item->offset;
577				for (k = (item->size +
578				    sizeof (uint64_t) - 1) &
579				    ~(sizeof (uint64_t) - 1);
580				    k > 0; k -= sizeof (uint64_t)) {
581					item->size = sizeof (uint64_t);
582					(void) define_one_error(fp, edp,
583					    item, nttime, interval, type, fon,
584					    fcnt, edp->acc_chk, opname,
585					    operand);
586					item->offset += sizeof (uint64_t);
587				}
588				item->size = save_size;
589				item->offset = save_offset;
590			} else {
591				(void) define_one_error(fp, edp, item,
592				    nttime, interval, type, fon, fcnt,
593				    edp->acc_chk, opname, operand);
594			}
595
596			if (op == BOFI_EQUAL) {
597				uint_t cnt;
598				for (cnt = 0; cnt < NPIO_DEFAULTS;
599				    cnt++, *ecnt = *ecnt + 1) {
600					if ((item->access_type == BOFI_DMA_W ||
601					    item->access_type == BOFI_DMA_R) &&
602					    item->size > sizeof (uint64_t) &&
603					    item->size <
604					    DMA_INDIVIDUAL_CORRUPT_CUTOFF) {
605						save_size = item->size;
606						save_offset = item->offset;
607						for (k = (item->size +
608						    sizeof (uint64_t) - 1) &
609						    ~(sizeof (uint64_t) - 1);
610						    k > 0;
611						    k -= sizeof (uint64_t)) {
612							item->size =
613							    sizeof (uint64_t);
614							(void) define_one_error(
615							    fp, edp, item,
616							    nttime, interval,
617							    type, fon, fcnt,
618							    edp->acc_chk,
619							    opname,
620							    pio_default_values
621							    [cnt]);
622							item->offset +=
623							    sizeof (uint64_t);
624						}
625						item->size = save_size;
626						item->offset = save_offset;
627					} else {
628						(void) define_one_error(fp,
629						    edp, item, nttime, interval,
630						    type, fon, fcnt,
631						    edp->acc_chk, opname,
632						    pio_default_values[cnt]);
633					}
634				}
635			}
636		}
637	}
638
639	if ((item->access_type & BOFI_PIO_W) && !atype_is_default) {
640		/*
641		 * user has asked for PIO_W
642		 */
643		(void) define_one_error(fp, edp, item, nttime, interval,
644		    type, fon, fcnt, edp->acc_chk, "NO", 0);
645		*ecnt = *ecnt + 1;
646	}
647
648	/*
649	 * and finally an access check errdef
650	 */
651	if (item->access_type & BOFI_PIO_RW)
652		(void) define_one_error(fp, edp, item, nttime, interval,
653		    type, fon, fcnt, 1, "OR", 0);
654
655	if (item->access_type & BOFI_DMA_RW)
656		(void) define_one_error(fp, edp, item, nttime, interval,
657		    type, fon, fcnt, 2, "OR", 0);
658
659}
660
661/*
662 * Convert a collection of log entries into error definitions.
663 */
664/* ARGSUSED */
665static int
666define_nerrs(int fd, FILE *fp, int *ecnt, struct bofi_errdef *edp,
667	struct acc_log_elem *items,
668	size_t nitems,
669	uint_t naccess,
670	uint_t minac,
671	uint_t maxac,
672	ulong_t	logtime,
673	ulong_t	logsize)
674{
675	char	*type;
676	uint_t	at;
677	int	i;
678	struct acc_log_elem	*item;
679	char	*opname;
680	uint_t	op;
681	uint64_t	operand;
682	int	cycleiops, cycledops;
683	int	intrs = 0;
684	ulong_t	ttime, nttime, interval;
685
686	op = edp->optype;
687	operand = edp->operand;
688	msg(3, "define_nerrs: nitems %d (ac %d at 0x%x): (%d %d)"
689	    " (op 0x%x 0x%llx)\n\n", nitems, naccess, items->access_type,
690	    minac, maxac, op, operand);
691
692	/*
693	 * all items are guaranteed have values in the two element set {0, at}
694	 * where at is a valid access type (so find the value of at)
695	 */
696	for (i = 0, item = items, at = 0; i < nitems; i++, item++)
697		if (item->access_type != 0) {
698			at = item->access_type;
699			break;
700		}
701	if (at == 0)
702		return (-1);
703
704	/*
705	 * find the string form of the access type
706	 */
707	for (i = 0, type = 0; atypes[i].str != 0; i++) {
708		if (atypes[i].code == at) {
709			type = atypes[i].str;
710			break;
711		}
712	}
713	if (type == 0) {
714		msg(0, "Unknown access type returned from bofi\n\t");
715		dump_log(0, 0, item, 1, BOFI_LOG_TIMESTAMP);
716		msg(1, "0x%x 0x%x 0x%x 0x%x\n", BOFI_LOG, BOFI_INTR,
717		    BOFI_DMA_RW, BOFI_PIO_RW);
718		return (-1);
719	}
720
721	msg(1, "define_n: at = 0x%d (%s)\n", at, type == 0 ? "null" : type);
722	/*
723	 * find the string form of the operator
724	 */
725	for (i = 0, opname = 0; optypes[i].str != 0; i++) {
726		if (op == optypes[i].code) {
727			opname = optypes[i].str;
728			break;
729		}
730	}
731
732	/*
733	 * if not found or inconsistent default to XOR
734	 */
735	if (opname == 0 ||
736	    (op == BOFI_NO_TRANSFER &&
737	    (at & (BOFI_DMA_RW|BOFI_PIO_R))) ||
738	    (op >= BOFI_DELAY_INTR && (at & BOFI_INTR) == 0)) {
739		opname = optypes[3].str;	/* "XOR" */
740		operand = myULLMAX;
741		op = optypes[3].code;
742	}
743
744	/*
745	 * if operator and access type are inconsistent choose a sensible
746	 * default
747	 */
748	cycleiops = 0;
749	if (at & BOFI_INTR)
750		if (op < BOFI_DELAY_INTR)
751			cycleiops = 1;
752		else if (op == BOFI_LOSE_INTR)
753			operand = 0;
754
755	cycledops = 0;
756	if (nitems == 1 && (at & BOFI_DMA_RW))
757		cycledops = 1;
758	/*
759	 * for each access in the list define one or more error definitions
760	 */
761	for (i = 0, item = items; i < nitems; i++, item++) {
762		size_t acnt, fcnt;
763		int j, fon;
764
765		if (item->access_type == 0)
766			continue;
767
768		/*
769		 * base number of errors to inject on 3% of number of
770		 * similar accesses seen during LOG phase
771		 */
772		acnt = item->repcount / 10 + 1; /* 10% */
773		fcnt = (acnt >= 3) ? acnt / 3 : 1; /* 3% */
774
775		/*
776		 * wait for twice the time it took during LOG phase
777		 */
778		if ((ttime = (item->access_time * 2)) < MIN_REPORT_TIME)
779			ttime = MIN_REPORT_TIME;
780		else if (max_edef_wait != 0 && ttime > max_edef_wait)
781			ttime = max_edef_wait;
782		/*
783		 * if edef_sleep set (-w) the use that, otherwise use default
784		 */
785		interval = edef_sleep ? edef_sleep : DEFAULT_EDEF_SLEEP;
786
787		msg(10,
788		    "define_n: item %d limit %d step %d (intr %d) tt(%lu)\n",
789		    i, item->repcount, acnt, intrs, ttime);
790
791		for (j = 0, fon = 1, nttime = ttime; j < item->repcount;
792		    j += acnt) {
793			if (policy & OPERATORSPOLICY) {
794				define_op_err(fp, ecnt, edp, item,
795				    nttime, interval, type, fon, fcnt);
796			} else {
797				if (cycleiops) {
798					op = ioptypes[intrs].code;
799					opname = ioptypes[intrs++].str;
800					switch (op) {
801					case BOFI_DELAY_INTR:
802						/* delay for 1 sec */
803						operand = 1000000;
804						break;
805					case BOFI_LOSE_INTR:
806						/* op not applicable */
807						operand = 0;
808						break;
809					case BOFI_EXTRA_INTR:
810					default:
811						/* generate 2 extra intrs */
812						operand = 0xfff;
813						break;
814					}
815					intrs %= 3;
816				} else if (cycledops) {
817					op = doptypes[intrs].code;
818					opname = doptypes[intrs++].str;
819					switch (op) {
820					case BOFI_EQUAL:
821						random_operand = lrand48() |
822						    ((uint64_t)
823						    (lrand48()) << 32);
824						break; /* a random value */
825					case BOFI_AND:
826						operand = 0xaddedabadb00bull;
827						break;
828					case BOFI_OR:
829						operand = 0xd1ab011c0af1a5c0ull;
830						break;
831					case BOFI_XOR:
832					default:
833						operand = myULLMAX;
834						break;
835					}
836					intrs %= 4;
837				}
838				(void) define_one_error(fp, edp, item,
839				    nttime, interval, type, fon,
840				    fcnt, edp->acc_chk, opname, operand);
841				*ecnt = *ecnt + 1;
842				if (op == BOFI_EQUAL) {
843					uint_t cnt;
844					for (cnt = 0; cnt < NPIO_DEFAULTS;
845					    cnt++, *ecnt = *ecnt + 1)
846						(void) define_one_error(fp,
847						    edp, item, nttime,
848						    interval, type, fon, fcnt,
849						    edp->acc_chk, opname,
850						    pio_default_values[cnt]);
851				}
852			}
853
854			/*
855			 * all non maximal policies should only generate
856			 * a single error definition set per access.
857			 */
858			if (!(policy & MAXIMALPOLICY))
859				break;
860
861			nttime = (logtime - item->access_time) *
862			    (j + acnt + fcnt - 1) / logsize;
863			if (nttime < MIN_REPORT_TIME)
864				nttime = MIN_REPORT_TIME;
865			else if (nttime > max_edef_wait)
866				nttime = max_edef_wait;
867
868			msg(11, "define_nerrs: %lu %d %d %d %llu\n", nttime,
869			    max_edef_wait, fon, fcnt, item->access_time);
870
871			if (item->access_type != BOFI_INTR)
872				fon += j;
873		}
874	}
875
876	return (0);
877}
878
879static int
880reduce_log(uint16_t pol, struct acc_log *log,		/* input args */
881	struct acc_log_elem **llp, size_t *cntp)	/* output args */
882{
883	ulong_t logtime;
884	struct acc_log_elem *items, *item, *elem;
885	int cnt, nitems, acnt;
886	int i, j, k, lb, ub, mina, maxa, cutoff[2], mean;
887
888	if (llp == 0 || cntp == 0)	/* subroutine interface violated */
889		return (-1);
890
891	if (*llp == 0) {
892		items = (void *)log->logbase;
893		nitems = log->entries;
894	} else {
895		items = *llp;	/* outputs double up as inputs */
896		nitems = *cntp;
897	}
898	/* has the utc time wrapped over ULMAX - unlikely so fix it at 10 */
899	logtime = (log->stop_time >= log->start_time) ?
900	    log->stop_time - log->start_time : 10ul;
901
902	msg(1, "reduce %d: logtime %lu\n", nitems, logtime);
903	/*
904	 * Sort the log by access type - do not remove duplicates yet (but do
905	 * remove access that do not match the requested log -> errdef policy
906	 * (defined by union pu pol). Set the repcount field of each entry to a
907	 * unique value (in the control statement of the for loop) - this
908	 * ensures that the qsort (following the for loop) will not remove any
909	 * entries.
910	 */
911	for (i = 0, cnt = 0, elem = items; i < nitems;
912	    elem->repcount = i, i++, elem++) {
913		/*
914		 * If interested in the I/O transfer size and this access
915		 * does not match the requested size then ignore the access
916		 */
917		if ((pol & SIZEPOLICY) &&
918		    (!(pol & MULTIPOLICY) || elem->repcount == 1) &&
919		    /* req for DMA / ddi_rep */
920		    (pol & elem->size) == 0)
921			elem->access_type = 0;
922			/* these will end up sorted at the head */
923		else {
924			cnt += 1;
925			elem->size *= elem->repcount;
926			if (log->flags & BOFI_LOG_TIMESTAMP)
927				/* real access time */
928				elem->access_time -= log->start_time;
929			else
930				/* linear fit */
931				elem->access_time = logtime * (i + 1) / nitems;
932		}
933	}
934
935	qsort((void *)items, nitems, sizeof (*items), log_cmp);
936
937	msg(5, "qsorted log raw (nitems %d cnt %d:\n", nitems, cnt);
938	dump_log(14, 0, items, nitems, log->flags);
939
940	if (cnt != nitems) {	/* some items should be ignored */
941		items += (nitems - cnt);	/* ignore these ones */
942		if ((nitems = cnt) == 0) {
943			*cntp = 0;
944			*llp = 0;
945			return (0);
946			/* the chosen policy has ignored everything */
947		}
948
949	}
950	/*
951	 * Now remove duplicate entries based on access type, address and size.
952	 * Reuse the repcount field to store the no. of duplicate accesses.
953	 * Store the average access time in the single remaining
954	 * representative of the duplicate set.
955	 */
956
957	for (i = 1, cnt = 1, elem = items, elem->repcount = 1, item = elem + 1;
958	    i < nitems; i++, item++) {
959		if (elem_cmp(elem, item) == 0) {
960			elem->access_time += item->access_time;
961			elem->repcount++;
962		} else {	/* not a duplicate */
963			elem->access_time = logtime / elem->repcount;
964			elem++;
965			*elem = *item;
966			cnt++;
967			elem->repcount = 1;
968		}
969	}
970	elem->access_time = logtime / elem->repcount;
971
972	/*
973	 * The log is sorted by access type - now resort to order by frequency
974	 * of accesses (ie for a given access type uncommon access will come
975	 * first.
976	 */
977
978	qsort((void *)items, cnt, sizeof (*items), log_cmp2);
979	msg(4, "qsorted log2: cnt is %d\n", cnt);
980	dump_log(4, 0, items, cnt, log->flags);
981
982	for (i = 0; i < cnt; i = j) {
983
984		/*
985		 * Pick out the set [i, j) consisting of elements with the same
986		 * access type
987		 */
988		for (j = i + 1, acnt = items[i].repcount; j < cnt &&
989		    items[j].access_type == items[i].access_type; j++)
990			acnt += items[j].repcount;
991
992		if (j - i == 1)	/* never ignore solo accesses of a given type */
993			continue;
994		/*
995		 * Now determine what constitutes uncommon and common accesses:
996		 */
997		mina = items[i].repcount;
998		maxa = items[j-1].repcount;
999		mean = acnt / (j - i); /* mean value */
1000
1001		if (pol & (UNCOMMONPOLICY|MEDIANPOLICY)) {
1002			cutoff[0] = (mean - mina) / DISTRIB_CUTOFF + mina;
1003
1004			for (ub = i; ub < j; ub++)
1005				if (items[ub].repcount > cutoff[0])
1006					break;
1007			lb = j - 1;
1008		} else {
1009			lb = i;
1010			ub = j-1;
1011		}
1012
1013		if (pol & (COMMONPOLICY|MEDIANPOLICY)) {
1014			cutoff[1] = maxa - (maxa - mean) / DISTRIB_CUTOFF;
1015			for (lb = j - 1; lb >= i; lb--)
1016				if (items[lb].repcount < cutoff[1])
1017					break;
1018			if (!(pol & (UNCOMMONPOLICY|MEDIANPOLICY)))
1019				ub = i;
1020		}
1021
1022		msg(3, "reduce_log: p 0x%x at %d:0x%x %d:0x%x acnt mina maxa"
1023		    " (%d %d %d)"
1024		    " mean %d cutoffs(%d %d) bnds(%d, %d)\n",
1025		    pol, i, items[i].access_type, j, items[j].access_type,
1026		    acnt, mina, maxa, mean, cutoff[0], cutoff[1], lb, ub);
1027
1028		if (ub <= lb)
1029			if (!(pol & MEDIANPOLICY))
1030				/* delete all the mid accesses */
1031				for (k = ub; k <= lb; k++)
1032					items[k].access_type = 0;
1033			else {
1034				if (!(pol & UNCOMMONPOLICY))
1035					/* delete uncommon accesses */
1036					for (k = i; k < ub; k++)
1037						items[k].access_type = 0;
1038				if (!(pol & COMMONPOLICY))
1039					/* delete common accesses */
1040					for (k = lb+1; k < j; k++)
1041						items[k].access_type = 0;
1042			}
1043	}
1044	msg(4, "reduce_log: returning %d items\n", cnt);
1045	dump_log(5, 0, items, cnt, log->flags);
1046	*cntp = cnt;
1047	*llp = items;
1048	return (0);
1049}
1050
1051static void
1052log2errdefs(int fd, struct bofi_errdef *edp, struct acc_log *log,
1053	char *devpath)
1054{
1055	struct acc_log_elem	*items;
1056	size_t			nitems;
1057	int			i, j;
1058	uint_t			acc_cnt;
1059	char			fname[_POSIX_PATH_MAX];
1060	FILE			*fp = 0;
1061	time_t			utc = time(NULL);
1062	int			ecnt = 0;
1063	int			err;
1064	ulong_t			logtime;
1065	char			*buffer;
1066	struct stat		statbuf;
1067
1068	items = (void *)log->logbase;
1069	nitems = log->entries;
1070	logtime = (log->stop_time >= log->start_time) ?
1071	    log->stop_time - log->start_time : 10ul;
1072
1073	if (nitems == 0)
1074		return;
1075
1076	/* ensure that generated errdefs complete in bounded time */
1077	if (max_edef_wait == 0)
1078		max_edef_wait =
1079		    logtime > MIN_REPORT_TIME ? logtime : MIN_REPORT_TIME * 2;
1080
1081	msg(4, "log2errdefs(0x%p, 0x%p, %d, 0x%x):\n",
1082	    (void *) edp, (void *) items, nitems, policy);
1083
1084	(void) snprintf(fname, sizeof (fname), "%s.%d", (char *)edp->name,
1085	    (int)getpid());
1086	if ((fp = fopen(fname, "w")) == 0)
1087		fp = outfile;
1088
1089	(void) fprintf(fp, "#!/bin/ksh -p\n\n");
1090	(void) fprintf(fp, "# %-24s%s\n", "Script creation time:", ctime(&utc));
1091	(void) fprintf(fp, "# %-24s%llu\n",
1092	    "Activation time:", log->start_time);
1093	(void) fprintf(fp, "# %-24s%llu\n",
1094	    "Deactivation time:", log->stop_time);
1095	(void) fprintf(fp, "# %-24s%d\n", "Log size:", nitems);
1096	(void) fprintf(fp, "# %-24s", "Errdef policy:");
1097	for (i = 0; ptypes[i].str != 0; i++)
1098		if (policy & ptypes[i].code)
1099			(void) fprintf(fp, "%s ", ptypes[i].str);
1100	(void) fprintf(fp, "\n");
1101	(void) fprintf(fp, "# %-24s%s\n", "Driver:", (char *)edp->name);
1102	(void) fprintf(fp, "# %-24s%d\n", "Instance:", edp->instance);
1103	if (edp->access_type & BOFI_PIO_RW) {
1104		(void) fprintf(fp, "# %-24s%d\n",
1105		    "Register set:", edp->rnumber);
1106		(void) fprintf(fp, "# %-24s0x%llx\n", "Offset:", edp->offset);
1107		(void) fprintf(fp, "# %-24s0x%llx\n", "Length:", edp->len);
1108	} else if (edp->access_type & BOFI_DMA_RW) {
1109		(void) fprintf(fp, "# %-24s%d\n", "DMA handle:", edp->rnumber);
1110		(void) fprintf(fp, "# %-24s0x%llx\n", "Offset:", edp->offset);
1111		(void) fprintf(fp, "# %-24s0x%llx\n", "Length:", edp->len);
1112	} else if ((edp->access_type & BOFI_INTR) == 0) {
1113		(void) fprintf(fp, "# %-24s%d\n",
1114		    "Unknown Handle Type:", edp->rnumber);
1115	}
1116
1117	(void) fprintf(fp, "# %-24s0x%x ( ", "Access type:",
1118	    (edp->access_type & ~BOFI_LOG));
1119	if (edp->access_type & BOFI_PIO_R)
1120		(void) fprintf(fp, "%s ", "pio_r");
1121	if (edp->access_type & BOFI_PIO_W)
1122		(void) fprintf(fp, "%s ", "pio_w");
1123	if (edp->access_type & BOFI_DMA_W)
1124		(void) fprintf(fp, "%s ", "dma_w");
1125	if (edp->access_type & BOFI_DMA_R)
1126		(void) fprintf(fp, "%s ", "dma_r");
1127	if (edp->access_type & BOFI_INTR)
1128		(void) fprintf(fp, "%s ", "intr");
1129	(void) fprintf(fp, ")\n\n");
1130	if (user_comment)
1131		(void) fprintf(fp, "# %-24s%s\n\n",
1132		    "Test Comment:", user_comment);
1133
1134	dump_log(0, fp, items, nitems, log->flags);
1135
1136	items = 0;
1137	if ((err = reduce_log(policy, log, &items, &nitems)) < 0 ||
1138	    nitems == 0) {
1139		msg(4, "log2errdefs: reduce_log err %d nitems %d\n",
1140		    err, nitems);
1141		return;
1142	}
1143	(void) fprintf(fp, "\nerror() { echo \""
1144	    "${0##*/}: $@\""
1145	    " >&2; exit 2; }\n");
1146	(void) fprintf(fp,
1147	    "trap ' ' 16\t# ignore - it is trapped by abort monitor_edef\n");
1148
1149	(void) fprintf(fp, "\nfixup_script()\n{\n");
1150	if (scriptargs > 0) {
1151		(void) fprintf(fp, "\tif [[ $1 -eq 1 ]]\n\tthen\n");
1152		(void) fprintf(fp, "\t\t# Call a user defined workload\n");
1153		(void) fprintf(fp, "\t\t# while injecting errors\n\t\t");
1154		for (i = 0; i < scriptargs; i++)
1155			(void) fprintf(fp, "%s ", fixup_script[i]);
1156		(void) fprintf(fp, "\n\tfi\n");
1157		(void) fprintf(fp, "\treturn 0\n");
1158	} else {
1159		(void) fprintf(fp, "\tif [[ $1 -eq 0 ]]\n\tthen\n");
1160		(void) fprintf(fp,
1161		    "\t\t# terminate any outstanding workload\n");
1162		(void) fprintf(fp, "\t\tif [ $script_pid -gt 0 ]; then\n");
1163		(void) fprintf(fp, "\t\t\tkill $script_pid\n");
1164		(void) fprintf(fp, "\t\t\tscript_pid=0\n");
1165		(void) fprintf(fp, "\t\tfi\n");
1166		(void) fprintf(fp, "\tfi\n");
1167		(void) fprintf(fp, "\treturn -1\n");
1168	}
1169	(void) fprintf(fp, "}\n\n");
1170	(void) fprintf(fp, "devpath=/devices%s\n\n", devpath);
1171	(void) fprintf(fp, "#\n");
1172	(void) fprintf(fp, "# following text extracted from th_script\n");
1173	(void) fprintf(fp, "#\n");
1174	if (stat("/usr/lib/th_script", &statbuf) == -1) {
1175		msg(0, "log2errdefs: stat of /usr/lib/th_script failed\n");
1176		return;
1177	}
1178	fd = open("/usr/lib/th_script", O_RDONLY);
1179	if (fd == -1) {
1180		msg(0, "log2errdefs: open of /usr/lib/th_script failed\n");
1181		return;
1182	}
1183	buffer = malloc(statbuf.st_size);
1184	if (!buffer) {
1185		msg(0, "log2errdefs: malloc for /usr/lib/th_script failed\n");
1186		return;
1187	}
1188	if (read(fd, buffer, statbuf.st_size) != statbuf.st_size) {
1189		msg(0, "log2errdefs: read of /usr/lib/th_script failed\n");
1190		return;
1191	}
1192	(void) fwrite(buffer, statbuf.st_size, 1, fp);
1193	(void) close(fd);
1194	(void) fprintf(fp, "#\n");
1195	(void) fprintf(fp, "# end of extracted text\n");
1196	(void) fprintf(fp, "#\n");
1197	(void) fprintf(fp, "run_subtest %s %d <<ERRDEFS\n",
1198	    (char *)edp->name, edp->instance);
1199
1200	for (i = 0; i < nitems; i = j) {
1201
1202		acc_cnt = items[i].repcount;
1203		for (j = i + 1;
1204		    j < nitems && items[j].access_type == items[i].access_type;
1205		    j++)
1206			acc_cnt += items[j].repcount;
1207		msg(1, "l2e: nitems %d i %d j %d at 0x%x\n",
1208		    nitems, i, j, items[i].access_type);
1209		if (items[i].access_type != 0)
1210			(void) define_nerrs(fd, fp, &ecnt, edp, items+i, j-i,
1211			    acc_cnt, items[i].repcount, items[j-1].repcount,
1212			    logtime, log->entries);
1213	}
1214
1215	(void) fprintf(fp, "ERRDEFS\n");
1216	(void) fprintf(fp, "exit 0\n");
1217
1218	if (fp != stdout && fp != stderr) {
1219		if (fchmod(fileno(fp), S_IRWXU|S_IRGRP|S_IROTH))
1220			msg(0, "fchmod failed: %s\n", strerror(errno));
1221		if (fclose(fp) != 0)
1222			msg(0, "close of %s failed: %s\n", fname,
1223			    strerror(errno));
1224	}
1225	msg(10, "log2errdefs: done\n");
1226}
1227
1228#define	LLSZMASK (sizeof (longlong_t) -1)
1229
1230static int
1231add_edef(int fd,
1232	struct bofi_errdef *errdef,	/* returned access criteria */
1233	struct bofi_errstate *errstate,
1234	struct handle_info *hdl,	/* handle to match against request */
1235	struct bofi_errdef *edp)	/* requested access criteria */
1236{
1237	*errdef = *edp;
1238	errdef->instance = hdl->instance;
1239
1240
1241	if (hdl->access_type == 0)
1242		return (EINVAL);
1243
1244	errdef->access_type =
1245	    errdef->access_type & (hdl->access_type|BOFI_LOG);
1246
1247	/* use a big log for PIO and a small one otherwise */
1248	if (lsize_is_default &&
1249	    (errdef->access_type & BOFI_PIO_RW) == 0) {
1250		errdef->access_count = DFLT_NONPIO_LOGSZ;
1251		errdef->fail_count = 0;
1252	}
1253	errdef->log.logsize = errstate->log.logsize =
1254	    errdef->access_count + errdef->fail_count - 1;
1255	if (errdef->log.logsize == -1U) {
1256		errdef->log.logsize = errstate->log.logsize = 0;
1257	}
1258	errdef->log.logbase = errstate->log.logbase =
1259	    (caddr_t)GETSTRUCT(struct acc_log_elem, errdef->log.logsize);
1260
1261	if (errdef->log.logbase == 0)
1262		return (EAGAIN);
1263
1264	errdef->rnumber = hdl->rnumber;
1265	errdef->offset = hdl->offset;
1266	errdef->len = hdl->len;
1267
1268	msg(4, "creating errdef: %d %s %d %d 0x%llx 0x%llx 0x%x 0x%x 0x%x"
1269	    " 0x%x 0x%x 0x%llx\n",
1270	    errdef->namesize, (char *)errdef->name,
1271	    errdef->instance, errdef->rnumber,
1272	    errdef->offset, errdef->len,
1273	    errdef->access_type,
1274	    errdef->access_count, errdef->fail_count,
1275	    errdef->acc_chk, errdef->optype, errdef->operand);
1276	if (ioctl(fd, BOFI_ADD_DEF, errdef) == -1) {
1277		perror("th_define - adding errdef failed");
1278		return (errno);
1279	}
1280	errdef->optype = edp->optype; /* driver clears it if fcnt is zero */
1281	errstate->errdef_handle = errdef->errdef_handle;
1282	return (0);
1283}
1284
1285static void
1286collect_state(int fd, int cmd,
1287	struct bofi_errstate *errstate,
1288	struct bofi_errdef *errdef,
1289	char *devpath)
1290{
1291	int rval;
1292	size_t ls = errstate->log.logsize;
1293
1294	msg(2, "collect_state: pre: edp->access_type 0x%x (logsize %d)\n",
1295	    errdef->access_type, errdef->log.logsize);
1296
1297	do {
1298		errstate->log.logsize = 0; /* only copy the driver log once */
1299
1300		msg(10, "collecting state (lsize %d) ...\n",
1301		    errstate->log.logsize);
1302		errno = 0;
1303
1304		if (ioctl(fd, cmd, errstate) == -1 && errno != EINTR) {
1305			perror("th_define (collect) -"
1306			    " waiting for error report failed");
1307			break;
1308		}
1309
1310		(void) fprintf(outfile, "Logged %d out of %d accesses"
1311		    " (%s %d %d 0x%x %d).\n",
1312		    errstate->log.entries, ls,
1313		    (char *)errdef->name, errdef->instance, errdef->rnumber,
1314		    errdef->access_type, errstate->log.wrapcnt);
1315
1316		(void) msg(1, "\t(ac %d fc %d lf 0x%x wc %d).\n",
1317		    errstate->access_count, errstate->fail_count,
1318		    errstate->log.flags, errstate->log.wrapcnt);
1319
1320		rval = errno;
1321		if ((errstate->log.flags & BOFI_LOG_WRAP) &&
1322		    errstate->access_count > 0)
1323			continue;
1324		if (errstate->access_count <= 1 &&
1325		    errstate->fail_count == 0 &&
1326		    errstate->acc_chk == 0) {
1327			msg(3, "collecting state complete entries %d\n",
1328			    errstate->log.entries);
1329			break;
1330		}
1331
1332		msg(5, "still collecting state: %d, %d, %d\n",
1333		    errstate->access_count, errstate->fail_count,
1334		    errstate->acc_chk);
1335		(void) msg(2, "Log: errno %d size %d entries %d "
1336		    "(off 0x%llx len 0x%llx) ac %d\n", errno,
1337		    errstate->log.logsize, errstate->log.entries,
1338		    errdef->offset, errdef->len, errstate->access_count);
1339
1340	} while (rval == 0 && errstate->log.entries < ls);
1341
1342	/* now grab the log itself */
1343	errstate->log.logsize = ls;
1344	if (errstate->log.entries != 0) {
1345		if (ioctl(fd, BOFI_CHK_STATE, errstate) == -1) {
1346			msg(0,
1347			    "%s: errorwhile retrieving %d log entries: %s\n",
1348			    Progname, errstate->log.entries, strerror(errno));
1349		} else {
1350			msg(2, "collect_state: post: edp->access_type 0x%x"
1351			    " (log entries %d %d) (%llu - %llu)\n",
1352			    errdef->access_type,
1353			    errstate->log.entries, errstate->access_count,
1354			    errstate->log.start_time, errstate->log.stop_time);
1355
1356			log2errdefs(fd, errdef, &(errstate->log), devpath);
1357		}
1358	}
1359}
1360
1361static void
1362print_err_reports(FILE *fp, struct bofi_errstate *esp,
1363	char *fname, char *cmt, int id)
1364{
1365	if (fname != 0 && *fname != 0)
1366		(void) fprintf(fp, "%sErrdef file %s definition %d:",
1367		    cmt, fname, id);
1368	else
1369		(void) fprintf(fp, "%s", cmt);
1370
1371	if (esp->access_count != 0) {
1372		(void) fprintf(fp, " (access count %d).\n", esp->access_count);
1373	} else {
1374		(void) fprintf(fp, "\n%s\tremaining fail count %d acc_chk %d\n",
1375		    cmt, esp->fail_count, esp->acc_chk);
1376		(void) fprintf(fp, "%s\tfail time 0x%llx error reported time"
1377		    " 0x%llx errors reported %d\n", cmt,
1378		    esp->fail_time, esp->msg_time,
1379		    esp->errmsg_count);
1380		if (esp->msg_time)
1381			(void) fprintf(fp, "%s\tmessage \"%s\" severity 0x%x\n",
1382			    cmt, esp->buffer, (uint_t)esp->severity);
1383	}
1384}
1385
1386static void
1387thr_collect(void *arg, char *devpath)
1388{
1389	int fd;
1390	struct collector_def *hi = (struct collector_def *)arg;
1391
1392	msg(4, "thr_collect: collecting %s inst %d rn %d at = 0x%x.\n",
1393	    hi->ed.name, hi->ed.instance,
1394	    hi->ed.rnumber, hi->ed.access_type);
1395
1396	if ((fd = open(BOFI_DEV, O_RDWR)) == -1) {
1397		if (errno == EAGAIN)
1398			msg(0, "Too many instances of bofi currently open\n");
1399		else
1400			msg(0, "Error while opening bofi driver: %s",
1401			    strerror(errno));
1402	} else {
1403		/*
1404		 * Activate the logging errdefs - then collect the results.
1405		 */
1406		(void) manage_instance(fd, hi->ed.name,
1407		    hi->ed.instance, BOFI_START);
1408		collect_state(fd, BOFI_CHK_STATE_W, &hi->es, &hi->ed, devpath);
1409	}
1410
1411	/*
1412	 * there is no more work to do on this access handle so clean up / exit.
1413	 */
1414	msg(3, "thr_collect: closing and broadcasting.\n");
1415	exit(0);
1416}
1417
1418/*
1419 * Given an access handle known to the bofi driver see if the user has
1420 * specified access criteria that match that handle. Note: this matching
1421 * algorithm should be kept consistent with the drivers alogorithm.
1422 */
1423static int
1424match_hinfo(struct handle_info *hp, int instance, uint_t access_type,
1425	int rnumber, offset_t offset, offset_t len)
1426{
1427
1428	msg(9, "matching (%d %d) 0x%x %d offset (%llx, %llx) len (%llx %llx)\n",
1429	    hp->instance, instance, access_type, rnumber,
1430	    hp->offset, offset, hp->len, len);
1431
1432	if (instance != -1 && hp->instance != instance)
1433		return (0);
1434	if ((access_type & BOFI_DMA_RW) &&
1435	    (hp->access_type & BOFI_DMA_RW) &&
1436	    (rnumber == -1 || hp->rnumber == rnumber))
1437		return (1);
1438	else if ((access_type & BOFI_INTR) &&
1439	    (hp->access_type & BOFI_INTR))
1440		return (1);
1441	else if ((access_type & BOFI_PIO_RW) &&
1442	    (hp->access_type & BOFI_PIO_RW) &&
1443	    (rnumber == -1 || hp->rnumber == rnumber) &&
1444	    (len == 0 || hp->offset < offset + len) &&
1445	    (hp->len == 0 || hp->offset + hp->len > offset))
1446		return (1);
1447	else
1448		return (0);
1449}
1450
1451/*
1452 * Obtain all the handles created by the driver specified by the name parameter
1453 * that match the remaining arguments. The output parameter nhdls indicates how
1454 * many of the structures pointed to by the output parameter hip match the
1455 * specification.
1456 *
1457 * It is the responsibility of the caller to free *hip when *nhdls != 0.
1458 */
1459static int
1460get_hinfo(int fd, char *name, struct handle_info **hip, size_t *nhdls,
1461    int instance, int atype, int rset, offset_t offset, offset_t len,
1462    int new_semantics)
1463{
1464	struct bofi_get_hdl_info hdli;
1465	int command;
1466
1467	command = BOFI_GET_HANDLE_INFO;
1468	hdli.namesize = strlen(name);
1469	(void) strncpy(hdli.name, name, MAXNAMELEN);
1470	/*
1471	 * Initially ask for the number of access handles (not the structures)
1472	 * in order to allocate memory
1473	 */
1474	hdli.hdli = 0;
1475	*hip = 0;
1476	hdli.count = 0;
1477
1478	/*
1479	 * Ask the bofi driver for all handles created by the driver under test.
1480	 */
1481	if (ioctl(fd, command, &hdli) == -1) {
1482		*nhdls = 0;
1483		msg(0, "driver failed to return handles: %s\n",
1484		    strerror(errno));
1485		return (errno);
1486	} else if ((*nhdls = hdli.count) == 0) {
1487		msg(1, "get_hinfo: no registered handles\n");
1488		return (0);	/* no handles */
1489	} else if ((*hip = GETSTRUCT(struct handle_info, *nhdls)) == 0) {
1490		return (EAGAIN);
1491	} else {
1492		struct handle_info *hp, **chosen;
1493		int i;
1494
1495		/* Ask for *nhdls handles */
1496		hdli.hdli = (caddr_t)*hip;
1497		if (ioctl(fd, command, &hdli) == -1) {
1498			int err = errno;
1499
1500			msg(0, "BOFI_GET_HANDLE_INFO ioctl returned error %d\n",
1501			    err);
1502			free(*hip);
1503			return (err);
1504		}
1505
1506		if (hdli.count < *nhdls)
1507			*nhdls = hdli.count; /* some handles have gone away */
1508
1509		msg(4, "qsorting %d handles\n", *nhdls);
1510		if (*nhdls > 1)
1511			/* sort them naturally (NB ordering is not mandatory) */
1512			qsort((void *)*hip, *nhdls, sizeof (**hip), hdl_cmp);
1513
1514		if ((chosen = malloc(sizeof (hp) * *nhdls)) != NULL) {
1515			struct handle_info **ip;
1516			/* the selected handles */
1517			struct handle_info *prev = 0;
1518			int scnt = 0;
1519
1520			for (i = 0, hp = *hip, ip = chosen; i < *nhdls;
1521			    i++, hp++) {
1522				/*
1523				 * Remark: unbound handles never match
1524				 * (access_type == 0)
1525				 */
1526				if (match_hinfo(hp, instance, atype, rset,
1527				    offset&0x7fffffff, len&0x7fffffff)) {
1528					msg(3, "match: 0x%x 0x%llx 0x%llx"
1529					    " 0x%llx (0x%llx)\n",
1530					    hp->access_type, hp->addr_cookie,
1531					    hp->offset, hp->len,
1532					    (hp->len & 0x7fffffff));
1533					if (prev &&
1534					    (prev->access_type & BOFI_DMA_RW) &&
1535					    (hp->access_type & BOFI_DMA_RW) &&
1536					    hp->instance == prev->instance &&
1537					    hp->len == prev->len &&
1538					    hp->addr_cookie ==
1539					    prev->addr_cookie)
1540						continue;
1541
1542					if ((hp->access_type & BOFI_DMA_RW) &&
1543					    (atype & BOFI_DMA_RW) !=
1544					    hp->access_type)
1545						if (new_semantics)
1546							continue;
1547
1548					if (prev)
1549						msg(3, "match_hinfo: match:"
1550						    " 0x%llx (%d %d) (%d %d)"
1551						    " (0x%x 0x%x) (0x%llx,"
1552						    " 0x%llx)\n",
1553						    hp->addr_cookie,
1554						    prev->instance,
1555						    hp->instance, prev->rnumber,
1556						    hp->rnumber,
1557						    prev->access_type,
1558						    hp->access_type, prev->len,
1559						    hp->len);
1560
1561					/* it matches so remember it */
1562					prev = *ip++ = hp;
1563					scnt += 1;
1564				}
1565			}
1566
1567			if (*nhdls != scnt) {
1568				/*
1569				 * Reuse the alloc'ed memory to return
1570				 * only those handles the user has asked for.
1571				 * But first prune the handles to get rid of
1572				 * overlapping ranges (they are ordered by
1573				 * offset and length).
1574				 */
1575				*nhdls = scnt;
1576				for (i = 0, hp = *hip, ip = chosen; i < scnt;
1577				    i++, ip++, hp++)
1578					if (hp != *ip)
1579						(void) memcpy(hp, *ip,
1580						    sizeof (*hp));
1581			}
1582			free(chosen);
1583		}
1584
1585		for (i = 0, hp = *hip; i < *nhdls; i++, hp++) {
1586			msg(4, "\t%d 0x%x %d 0x%llx 0x%llx 0x%llx\n",
1587			    hp->instance, hp->access_type, hp->rnumber,
1588			    hp->len, hp->offset, hp->addr_cookie);
1589		}
1590	}
1591	if (*nhdls == 0 && *hip)
1592		free(*hip);
1593
1594	msg(4, "get_info: %s got %d handles\n", name, *nhdls);
1595	return (0);
1596}
1597
1598static void
1599init_sigs()
1600{
1601	struct sigaction sa;
1602	int *ip, sigs[] = {SIGINT, SIGTERM, 0};
1603
1604	sa.sa_handler = kill_sighandler;
1605	(void) sigemptyset(&sa.sa_mask);
1606	for (ip = sigs; *ip; ip++)
1607		(void) sigaddset(&sa.sa_mask, *ip);
1608	sa.sa_flags = 0;
1609	for (ip = sigs; *ip; ip++)
1610		(void) sigaction(*ip, &sa, NULL);
1611}
1612
1613static void
1614up_resources()
1615{
1616	struct rlimit rl;
1617
1618	/* Potentially hungry on resources so up them all to their maximums */
1619	if (getrlimit(RLIMIT_NOFILE, &rl) < 0)
1620		msg(0, "failed to obtain RLIMIT_NOFILE: %s\n", strerror(errno));
1621	else {
1622		msg(12, "RLIMIT_NOFILE\t %lu (%lu)\n",
1623		    rl.rlim_cur, rl.rlim_max);
1624		rl.rlim_cur = rl.rlim_max;
1625		if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
1626			msg(0, "failed to set RLIMIT_NOFILE: %s\n",
1627			    strerror(errno));
1628		(void) enable_extended_FILE_stdio(-1, -1);
1629	}
1630	if (getrlimit(RLIMIT_DATA, &rl) < 0)
1631		msg(0, "failed to obtain RLIMIT_DATA: %s\n", strerror(errno));
1632	else {
1633		msg(12, "RLIMIT_DATA\t %lu (%lu)\n", rl.rlim_cur, rl.rlim_max);
1634		rl.rlim_cur = rl.rlim_max;
1635		if (setrlimit(RLIMIT_DATA, &rl) < 0)
1636			msg(0, "failed to set RLIMIT_DATA: %s\n",
1637			    strerror(errno));
1638	}
1639	if (getrlimit(RLIMIT_FSIZE, &rl) < 0)
1640		msg(0, "failed to obtain RLIMIT_FSIZE: %s\n", strerror(errno));
1641	else {
1642		msg(12, "RLIMIT_FSIZE\t %lu (%lu)\n", rl.rlim_cur, rl.rlim_max);
1643		rl.rlim_cur = rl.rlim_max;
1644		if (setrlimit(RLIMIT_FSIZE, &rl) < 0)
1645			msg(0, "failed to set RLIMIT_FSIZE: %s\n",
1646			    strerror(errno));
1647	}
1648}
1649
1650static FILE *
1651create_test_file(char *drvname)
1652{
1653	char dirname[_POSIX_PATH_MAX];
1654	char testname[_POSIX_PATH_MAX];
1655	FILE *fp = 0;
1656	time_t utc = time(NULL);
1657
1658	if (snprintf(dirname, sizeof (dirname), "%s.test.%lu",
1659	    drvname, utc) == -1 ||
1660	    snprintf(testname, sizeof (testname), "%s.test.%lu",
1661	    drvname, utc) == -1)
1662		return (0);
1663
1664	if (mkdir(dirname, S_IRWXU|S_IRGRP|S_IROTH)) {
1665		msg(0, "Error creating %s: %s\n", dirname, strerror(errno));
1666		return (0);
1667	}
1668	if (chdir(dirname)) {
1669		(void) rmdir(dirname);
1670		return (0);
1671	}
1672	if ((fp = fopen(testname, "w")) == 0)
1673		return (0);	/* leave created directory intact */
1674
1675	return (fp);
1676}
1677
1678struct walk_arg {
1679	char *path;
1680	int instance;
1681	char name[MAXPATHLEN];
1682	int pathlen;
1683};
1684
1685static int
1686walk_callback(di_node_t node, void *arg)
1687{
1688	struct walk_arg *warg = (struct walk_arg *)arg;
1689	char *driver_name;
1690	char *path;
1691
1692	driver_name = di_driver_name(node);
1693	if (driver_name != NULL) {
1694		if (strcmp(driver_name, warg->name) == NULL &&
1695		    di_instance(node) == warg->instance) {
1696			path = di_devfs_path(node);
1697			if (path == NULL)
1698				warg->path = NULL;
1699			else
1700				(void) strncpy(warg->path, path, warg->pathlen);
1701			return (DI_WALK_TERMINATE);
1702		}
1703	}
1704	return (DI_WALK_CONTINUE);
1705}
1706
1707static int
1708getpath(char *path, int instance, char *name, int pathlen)
1709{
1710	di_node_t node;
1711	struct walk_arg warg;
1712
1713	warg.instance = instance;
1714	(void) strncpy(warg.name, name, MAXPATHLEN);
1715	warg.path = path;
1716	warg.pathlen = pathlen;
1717	if ((node = di_init("/", DINFOSUBTREE)) == DI_NODE_NIL)
1718		return (-1);
1719	if (di_walk_node(node, DI_WALK_CLDFIRST, &warg, walk_callback) == -1) {
1720		di_fini(node);
1721		return (-1);
1722	}
1723	if (warg.path == NULL) {
1724		di_fini(node);
1725		return (-1);
1726	}
1727	di_fini(node);
1728	return (0);
1729}
1730
1731/*
1732 * Record logsize h/w accesses of type 'edp->access_type' made by instance
1733 * 'edp->instance' of driver 'edp->name' to the register set (or dma handle)
1734 * 'edp->rnumber' that lie within the range 'edp->offset' to
1735 * 'edp->offset' + 'edp->len'.
1736 * Access criteria may be mixed and matched:
1737 * -	access types may be combined (PIO read/write, DMA read write or intrs);
1738 * -	if 'edp->instance' is -1 all instances are checked for the criteria;
1739 * -	if 'edp->rnumber' is -1 all register sets and dma handles are matched;
1740 * -	'offset' and 'len' indicate that only PIO and DMA accesses within the
1741 *	range 'edp->offset' to 'edp->len' will be logged. Putting 'edp->offset'
1742 *      to zero and 'edp->len' to -1ull gives maximal coverage.
1743 *
1744 * 'collecttime' is the number of seconds used to log accesses
1745 *		(default is infinity).
1746 */
1747static void
1748test_driver(struct bofi_errdef *edp,
1749	unsigned long long collecttime)
1750{
1751	pid_t pid;
1752	int statloc;
1753	struct collector_def *cdefs, *cdp;
1754	struct handle_info *hdls, *hdl;
1755	int i, fd;
1756	size_t cnt;
1757	size_t nchildren;
1758	unsigned long long timechunk;
1759	FILE *sfp;	/* generated control test file */
1760	char buf[MAXPATHLEN];
1761	char devpath[MAXPATHLEN];
1762	char *devpathp = "NULL";
1763	int drv_inst;
1764	int got_it = 0;
1765
1766	char *name = (char *)edp->name;
1767	uint_t logsize = edp->access_count + edp->fail_count - 1;
1768	int inst = edp->instance;
1769	uint_t atype = edp->access_type;
1770	int rset = edp->rnumber;
1771	offset_t offset = edp->offset;
1772	offset_t len = edp->len;
1773
1774	msg(4, "test_driver: %s %d inst %d 0x%x rset %d %llx %llx\n",
1775	    name, logsize, inst, atype, rset, offset, len);
1776
1777	drv_inst = inst;
1778	if (getpath(devpath, inst, name, MAXPATHLEN) != -1) {
1779		devpathp = devpath;
1780		got_it = 1;
1781	}
1782	if (logsize == -1U)
1783		logsize = 0;
1784	fd = open(BOFI_DEV, O_RDWR);
1785	if (fd == -1) {
1786		perror("get_hdl_info - bad open of bofi driver");
1787		return;
1788	}
1789	if (got_it) {
1790		(void) snprintf(buf, sizeof (buf),
1791		    "th_manage /devices%s offline", devpathp);
1792		(void) system(buf);
1793		(void) snprintf(buf, sizeof (buf),
1794		    "th_manage /devices%s online", devpathp);
1795		(void) system(buf);
1796		(void) snprintf(buf, sizeof (buf),
1797		    "th_manage /devices%s getstate >/dev/null", devpathp);
1798		(void) system(buf);
1799	}
1800	if (get_hinfo(fd, name, &hdls, &cnt,
1801	    inst, atype, rset, offset, len, 1) != 0) {
1802		msg(0, "driver_test: bad get_info for %d hdls\n", cnt);
1803		return;
1804	} else if (logsize == 0 || collecttime == 0 || cnt == 0) {
1805		if (cnt == 0)
1806			msg(1, "No matching handles.\n");
1807		return;
1808	}
1809	if ((cdefs = GETSTRUCT(struct collector_def, cnt)) == 0) {
1810		msg(0, "driver_test: can't get memory for %d cdefs\n", cnt);
1811		return;
1812	}
1813	up_resources();
1814	if (got_it) {
1815		if (scriptargs > 0) {
1816			(void) snprintf(buf, sizeof (buf),
1817			    "DRIVER_PATH=/devices%s DRIVER_INSTANCE=%d"
1818			    " DRIVER_UNCONFIGURE=0 DRIVER_CONFIGURE=1",
1819			    devpathp, drv_inst);
1820			for (i = 0; i < scriptargs; i++) {
1821				(void) strcat(buf, " ");
1822				(void) strcat(buf, fixup_script[i]);
1823			}
1824			(void) strcat(buf, " &");
1825		} else {
1826			(void) snprintf(buf, sizeof (buf),
1827			    "while : ; do th_manage /devices%s online;"
1828			    " th_manage /devices%s getstate >/dev/null;"
1829			    " th_manage /devices%s offline;done &"
1830			    " echo $! >/tmp/bofi.pid",
1831			    devpathp, devpathp, devpathp);
1832		}
1833		(void) system(buf);
1834		(void) snprintf(buf, sizeof (buf), "sleep %d",
1835		    edef_sleep ? edef_sleep : DEFAULT_EDEF_SLEEP);
1836		(void) system(buf);
1837	}
1838
1839	(void) fprintf(outfile,
1840	    "Logging accesses to instances ");
1841	for (i = 0, inst = -1, hdl = hdls; i < cnt;
1842	    i++, hdl++) {
1843		if (inst != hdl->instance) {
1844			inst = hdl->instance;
1845			(void) fprintf(outfile, "%d ", inst);
1846		}
1847	}
1848	(void) fprintf(outfile, " (%d logs of size 0x%x).\n\t"
1849	    "(Use th_manage ... clear_errdefs to terminate"
1850	    " logging)\n", cnt, logsize);
1851
1852	sfp = create_test_file(name);
1853	/*
1854	 * Install a logging errdef for each matching handle,
1855	 * and then create a child to collect the log.
1856	 * The child is responsible for activating the log.
1857	 */
1858	for (i = 0, cdp = cdefs, hdl = hdls, nchildren = 0;
1859	    i < cnt; i++, cdp++, hdl++) {
1860		if (add_edef(fd, &cdp->ed, &cdp->es, hdl, edp) != 0) {
1861			cdp->lp = 0;
1862			cdp->pid = 0;
1863		} else {
1864			cdp->lp = (void *)cdp->ed.log.logbase;
1865			msg(1, "test_driver: thr_create:"
1866			    " lsize 0x%x 0x%x at 0x%x\n",
1867			    cdp->es.log.logsize,
1868			    cdp->ed.log.logsize,
1869			    cdp->ed.access_type);
1870			if ((pid = fork()) == -1) {
1871				msg(0, "fork failed for handle"
1872				    " %d: %s\n", i, strerror(errno));
1873				cdp->pid = 0;	/* ignore */
1874			} else if (pid == 0) {
1875				thr_collect(cdp, devpathp);
1876			} else {
1877				cdp->pid = pid;
1878				nchildren += 1;
1879			}
1880		}
1881	}
1882
1883	if (nchildren != 0) {
1884		if (sfp) {
1885			(void) fprintf(sfp, "#!/bin/ksh -p\n\n");
1886			(void) fprintf(sfp,
1887			    "\n# Test control script generated using:\n#");
1888			for (i = 0; i < pargc; i++)
1889				(void) fprintf(sfp, " %s", pargv[i]);
1890			(void) fprintf(sfp, "\n\n");
1891			(void) fprintf(sfp, "\nrun_tests()\n{\n");
1892			for (i = 0, cdp = cdefs; i < cnt; i++, cdp++)
1893				if (cdp->pid) {
1894					(void) fprintf(sfp,
1895					    "\tif [ -x ./%s.%d ]\n\tthen\n",
1896					    name, (int)cdp->pid);
1897					(void) fprintf(sfp,
1898					    "\t\techo \"Starting test"
1899					    " %d (id %d)\"\n",
1900					    i, (int)cdp->pid);
1901					(void) fprintf(sfp, "\t\t./%s.%d\n",
1902					    name, (int)cdp->pid);
1903					(void) fprintf(sfp, "\t\techo \""
1904					    "Test %d (id %d) complete\"\n",
1905					    i, (int)cdp->pid);
1906					(void) fprintf(sfp, "\tfi\n");
1907				}
1908			(void) fprintf(sfp, "}\n\nrun_tests\n");
1909			if (fchmod(fileno(sfp), S_IRWXU|S_IRGRP|S_IROTH))
1910				msg(0, "fchmod on control script failed: %s\n",
1911				    strerror(errno));
1912			if (fclose(sfp) != 0)
1913				msg(0, "Error closing control script: %s\n",
1914				    strerror(errno));
1915		}
1916
1917		set_handler(SIGALRM);	/* handle it */
1918		/*
1919		 * The user may want to terminate logging before the log fills
1920		 * so use a timer to signal the logging children to handle this
1921		 * case.
1922		 */
1923		timechunk = collecttime / MAXALRMCALL;
1924		collecttime = collecttime - timechunk * MAXALRMCALL;
1925
1926		msg(2, "logging for (0x%llx 0x%llx)\n", timechunk, collecttime);
1927
1928		(void) alarm(collecttime); /* odd bit of collect time */
1929
1930		/* wait for the log to fill or deadline satisfied */
1931		for (;;) {
1932			pid = wait(&statloc);
1933			for (i = 0, nchildren = 0, cdp = cdefs;
1934			    i < cnt; i++, cdp++)
1935				if (cdp->pid == pid)
1936					cdp->pid = 0;
1937			for (i = 0, nchildren = 0, cdp = cdefs;
1938			    i < cnt; i++, cdp++)
1939				if (cdp->pid)
1940					nchildren++;
1941			if (nchildren == 0)
1942				break;
1943			if (killed)
1944				break;
1945			if (alarmed) {
1946				if (timechunk-- > 0) {
1947					/*
1948					 * prepare for the next timeslice by
1949					 * rearming the clock
1950					 */
1951					if (alarm(MAXALRMCALL) == 0)
1952						alarmed = 0;
1953					else {
1954						/*
1955						 * must have been a user abort
1956						 * (via SIGALRM)
1957						 */
1958						(void) alarm(0);
1959						break;
1960					}
1961				} else
1962					break;
1963			}
1964		}
1965
1966		(void) fprintf(outfile, "Logging complete.\n");
1967	}
1968	if (got_it) {
1969		if (scriptargs > 0) {
1970			(void) snprintf(buf, sizeof (buf),
1971			    "DRIVER_PATH=/devices%s DRIVER_INSTANCE=%d"
1972			    " DRIVER_UNCONFIGURE=1 DRIVER_CONFIGURE=0",
1973			    devpathp, drv_inst);
1974			for (i = 0; i < scriptargs; i++) {
1975				(void) strcat(buf, " ");
1976				(void) strcat(buf, fixup_script[i]);
1977			}
1978			(void) system(buf);
1979		} else {
1980			(void) system("kill `cat /tmp/bofi.pid`");
1981		}
1982	}
1983	msg(2, "test_driver: terminating\n");
1984}
1985
1986static int
1987getnameinst(char *orig_path, int *instance, char *name, int namelen)
1988{
1989	di_node_t node;
1990	char *binding_name;
1991
1992	if ((node = di_init(&orig_path[8], DINFOSUBTREE|DINFOMINOR)) ==
1993	    DI_NODE_NIL)
1994		return (-1);
1995	if ((binding_name = di_driver_name(node)) == NULL)
1996		return (-1);
1997	*instance = di_instance(node);
1998	(void) strncpy(name, binding_name, namelen);
1999	di_fini(node);
2000	return (0);
2001}
2002
2003static char syntax[] =
2004	"          [ -n name [ -i instance ] | -P path ]\n"
2005	"          [ -a acc_types ] [ -r rnumber ]\n"
2006	"          [ -l offset [ length ] ] [ -c count [ failcount ] ]\n"
2007	"          [ -o operator [ operand ] ] [ -f acc_chk  ]\n"
2008	"          [ -w max_wait_period [ report_interval ] ]\n"
2009	"     or\n"
2010	"          [ -n name [ -i instance ] | -P path ]\n"
2011	"          -a  LOG  [  acc_types ]  [ -r rnumber]\n"
2012	"          [ -l offset [ length ] ] [ -c count [ failcount ] ]\n"
2013	"          [ -s collect_time ] [ -p policy ] [ -x flags ]\n"
2014	"          [ -C ] [-e fixup_script ]\n"
2015	"     or\n"
2016	"          -h";
2017
2018int
2019main(int argc, char *argv[])
2020{
2021	extern char *optarg;
2022	extern int optind;
2023
2024	char	c;		/* for parsing getopts */
2025	int	nopts = 0;	/* for backward compatibility */
2026	int	err = 0;
2027
2028	/* use a maximal set of defaults for logging or injecting */
2029	struct bofi_errdef errdef = {
2030		0,		/* length of driver name */
2031		{0},		/* driver name */
2032		-1,		/* monitor all instances */
2033		-1,		/* monitor all register sets and DMA handles */
2034		(offset_t)0,	/* monitor from start of reg. set or DMA hd */
2035		myLLMAX,	/* monitor whole reg set or DMA hdl(no LLMAX) */
2036		0,		/* qualify all */
2037		DFLTLOGSZ,	/* default no. of accesses before corrupting */
2038		0u,		/* default no. of accesses to corrupt */
2039		0u,		/* no check access corruption */
2040		BOFI_NOP,	/* no corruption operator by default */
2041		myULLMAX,	/* default operand */
2042		{0, 0, BOFI_LOG_TIMESTAMP, /* timestamp by default */
2043		0, 0, 0, 0},	/* no logging by default */
2044		0};
2045
2046
2047	/* specify the default no of seconds for which to monitor */
2048	unsigned long long	collecttime = DFLTLOGTIME;
2049
2050	char	*str;	/* temporary variable */
2051	long	tmpl;	/* another one */
2052	int		i;
2053	uint_t	tmpui;
2054
2055	char buf[MAXPATHLEN];
2056
2057	Progname = (char *)strrchr(*argv, '/');
2058	Progname = (Progname == NULL) ? *argv : Progname + 1;
2059
2060	errfile = stderr;
2061	outfile = stdout;
2062	policy = 0;
2063	lsize_is_default = 1;
2064	pargv = argv;
2065	pargc = argc;
2066
2067	while ((c = getopt(argc, argv, "a:c:C:dD:e:f:h:i:l:n:o:p:P:r:s:tw:x"))
2068	    != EOF) {
2069		nopts++;
2070		switch (c) {
2071		case 'a':
2072			msg(2, "option a: optarg %s optind %d argc %d\n",
2073			    optarg, optind, argc);
2074			if ((err = str_to_bm(optarg, atypes,
2075			    &errdef.access_type)) == 0)
2076				while (optind < argc && *argv[optind] != '-') {
2077					if ((err = str_to_bm(argv[optind++],
2078					    atypes, &errdef.access_type)))
2079						break;
2080				}
2081			break;
2082		case 'c':
2083			lsize_is_default = 0;
2084			/* zero is valid */
2085			errdef.access_count = strtoul(optarg, &str, 0);
2086			if (str == optarg)
2087				err = EINVAL;
2088			else if (optind < argc && (argv[optind][0] != '-' ||
2089			    (strlen(argv[optind]) > 1 &&
2090			    isdigit(argv[optind][1]))))
2091				errdef.fail_count =
2092				    strtoull(argv[optind++], 0, 0);
2093			break;
2094		case 'C':
2095			user_comment = optarg;
2096			if (optind < argc && argv[optind][0] != '-')
2097				err = EINVAL;
2098			break;
2099		case 'D':
2100			dbglvl = strtoul(optarg, &str, 0);
2101			break;
2102		case 'e':
2103			fixup_script = 0;
2104			scriptargs = 0;
2105			fixup_script = &argv[optind - 1];
2106			scriptargs += 1;
2107			while (optind < argc) {
2108				optind += 1;
2109				scriptargs += 1;
2110			}
2111			break;
2112		case 'f':
2113			tmpl = strtol(optarg, &str, 0);
2114
2115			if (str != optarg)
2116				errdef.acc_chk = tmpl;
2117			else if (strcmp(optarg, "PIO") == NULL)
2118				errdef.acc_chk = 1;
2119			else if (strcmp(optarg, "DMA") == NULL)
2120				errdef.acc_chk = 2;
2121			else if (strcmp(optarg, "U4FT_ACC_NO_PIO") == NULL)
2122				errdef.acc_chk = 1;
2123			else if (strcmp(optarg, "U4FT_ACC_NO_DMA") == NULL)
2124				errdef.acc_chk = 2;
2125			else
2126				err = EINVAL;
2127			break;
2128		case 'i':
2129			if ((errdef.instance = strtol(optarg, &str, 0)) < 0)
2130				errdef.instance = -1;
2131			else if (str == optarg)
2132				err = EINVAL;
2133			break;
2134		case 'l':
2135			errdef.offset = strtoull(optarg, &str, 0);
2136			if (str == optarg)
2137				err = EINVAL;
2138			else if (optind < argc &&
2139			    (argv[optind][0] != '-' ||
2140			    (strlen(argv[optind]) > 1 &&
2141			    isdigit(argv[optind][1])))) {
2142				/* -1 indicates the rest of register set */
2143				errdef.len = strtoull(argv[optind++], 0, 0);
2144			}
2145			break;
2146		case 'n':
2147			(void) strncpy(errdef.name, optarg, MAXNAMELEN);
2148			if ((errdef.namesize = strlen(errdef.name)) == 0)
2149				err = EINVAL;
2150			break;
2151		case 'o':
2152			for (i = 0; optypes[i].str != 0; i++)
2153				if (strcmp(optarg, optypes[i].str) == 0) {
2154					errdef.optype = optypes[i].code;
2155					break;
2156				}
2157			if (optypes[i].str == 0)
2158				err = EINVAL;
2159			else if (optind < argc &&
2160			    (argv[optind][0] != '-' ||
2161			    (strlen(argv[optind]) > 1 &&
2162			    isdigit(argv[optind][1]))))
2163				errdef.operand =
2164				    strtoull(argv[optind++], 0, 0);
2165			break;
2166		case 'p':
2167			tmpui = 0x0u;
2168			if ((err = str_to_bm(optarg, ptypes, &tmpui)) == 0) {
2169				while (optind < argc && *argv[optind] != '-')
2170					if ((err = str_to_bm(argv[optind++],
2171					    ptypes, &tmpui)))
2172						break;
2173				policy = (uint16_t)tmpui;
2174			}
2175			if (err == 0 && (policy & BYTEPOLICY))
2176				errdef.log.flags |= BOFI_LOG_REPIO;
2177			break;
2178		case 'P':
2179			if (getnameinst(optarg, &errdef.instance, buf,
2180			    MAXPATHLEN) == -1)
2181				err = EINVAL;
2182			else
2183				(void) strncpy(errdef.name, buf, MAXNAMELEN);
2184			break;
2185		case 'r':
2186			if ((errdef.rnumber = strtol(optarg, &str, 0)) < 0)
2187				errdef.rnumber = -1;
2188			if (str == optarg) err = EINVAL;
2189			break;
2190		case 's':
2191			collecttime = strtoull(optarg, &str, 0);
2192			if (str == optarg)
2193				err = EINVAL;	/* zero is valid */
2194			break;
2195		case 'w':
2196			do_status = 1;
2197			max_edef_wait = strtoul(optarg, &str, 0);
2198			/* zero is valid */
2199			if (str == optarg)
2200				err = EINVAL;
2201			else if (optind < argc &&
2202			    (argv[optind][0] != '-' ||
2203			    (strlen(argv[optind]) > 1 &&
2204			    isdigit(argv[optind][1]))))
2205				edef_sleep = strtoull(argv[optind++], 0, 0);
2206
2207			break;
2208		case 'x':
2209			if ((optind < argc && *argv[optind] == '-') ||
2210			    optind == argc)
2211				errdef.log.flags |= BOFI_LOG_WRAP;
2212			else {
2213				if (strchr(argv[optind], 'w') != 0)
2214					errdef.log.flags |= BOFI_LOG_WRAP;
2215				if (strchr(argv[optind], 'r') != 0)
2216					errdef.log.flags |= BOFI_LOG_REPIO;
2217				if (strchr(argv[optind], 't') != 0)
2218					errdef.log.flags |= BOFI_LOG_TIMESTAMP;
2219				if (strstr(argv[optind], "~t") != 0)
2220					errdef.log.flags &= ~BOFI_LOG_TIMESTAMP;
2221				optind++;
2222			}
2223			break;
2224		case 'h':
2225			(void) fprintf(errfile, "usage: %s %s\n",
2226			    Progname, syntax);
2227			exit(0);
2228			break;
2229		case '?':	/* also picks up missing parameters */
2230		default:
2231			(void) fprintf(errfile, "usage: %s %s\n",
2232			    Progname, syntax);
2233			exit(2);
2234		}
2235
2236		if (err) {
2237			(void) fprintf(errfile, "usage: %s %s\n",
2238			    Progname, syntax);
2239			exit(2);
2240		}
2241		if (c == 'e')
2242			break;	/* the -e option must be the final option */
2243	}
2244
2245
2246	if (errdef.name[0] == 0) {
2247		msg(0, "%s - invalid name parameter\n", Progname);
2248		exit(1);
2249	}
2250	errdef.namesize = strlen(errdef.name);
2251
2252	if (policy == 0) {
2253		policy |= UNBIASEDPOLICY;
2254		policy |= OPERATORSPOLICY;
2255	}
2256
2257	if (errdef.optype == BOFI_NOP)
2258		errdef.optype = BOFI_XOR;
2259	if (errdef.access_type == BOFI_LOG) { /* qualify all accesses */
2260		errdef.access_type =
2261		    (BOFI_LOG|BOFI_DMA_RW|BOFI_PIO_RW|BOFI_INTR);
2262		atype_is_default = 1;
2263	} else if (errdef.access_type == 0) { /* qualify all accesses */
2264		errdef.access_type =
2265		    (BOFI_DMA_RW|BOFI_PIO_RW|BOFI_INTR);
2266		atype_is_default = 1;
2267	} else
2268		atype_is_default = 0;
2269
2270	init_sigs();
2271	if ((errdef.access_type & BOFI_LOG) == 0) {
2272		int fd, i, instance;
2273		size_t cnt;
2274		struct handle_info *hdls, *hp;
2275
2276		if ((fd = open(BOFI_DEV, O_RDWR)) == -1) {
2277			msg(0, "%s: error opening bofi driver: %s\n",
2278			    Progname, strerror(errno));
2279			exit(1);
2280		}
2281		if ((err = get_hinfo(fd, errdef.name, &hdls, &cnt,
2282		    errdef.instance, errdef.access_type, errdef.rnumber,
2283		    errdef.offset, errdef.len, 0)) != 0) {
2284			msg(0, "%s: Bad lookup on bofi driver.\n", Progname);
2285			(void) close(fd);
2286			exit(1);
2287		} else if (cnt == 0) {
2288			msg(0,
2289			    "%s: No handles match request access criteria.\n",
2290			    Progname);
2291			(void) close(fd);
2292			exit(1);
2293		}
2294		if (errdef.instance == -1)
2295			instance = -1;
2296		else {
2297			instance = hdls->instance;
2298			for (i = 0, hp = hdls; i < cnt; i++, hp++) {
2299				if (instance != hp->instance) {
2300					instance = -1;
2301					break;
2302				}
2303			}
2304		}
2305		if (instance == -1) {
2306			msg(0, "Multiple instances match access criteria"
2307			    " (only allowed when logging):\n");
2308			msg(0, "\tinst\taccess\trnumber\toffset\tlength\n");
2309			for (i = 0, hp = hdls; i < cnt; i++, hp++)
2310				msg(0, "\t%d\t0x%x\t%d\t0x%llx\t0x%llx\n",
2311				    hp->instance, hp->access_type,
2312				    hp->rnumber, hp->offset, hp->len);
2313		} else {
2314			struct bofi_errstate es;
2315			int timeleft = max_edef_wait;
2316
2317			if (ioctl(fd, BOFI_ADD_DEF, &errdef) == -1) {
2318				perror("th_define - adding errdef failed");
2319			} else {
2320				es.errdef_handle = errdef.errdef_handle;
2321				msg(4, "waiting for edef:"
2322				    " %d %s %d %d 0x%llx 0x%llx 0x%x 0x%x"
2323				    " 0x%x 0x%x 0x%x 0x%llx\n",
2324				    errdef.namesize, errdef.name,
2325				    errdef.instance, errdef.rnumber,
2326				    errdef.offset, errdef.len,
2327				    errdef.access_type, errdef.access_count,
2328				    errdef.fail_count, errdef.acc_chk,
2329				    errdef.optype, errdef.operand);
2330
2331				set_handler(SIGALRM);	/* handle it */
2332
2333				do {
2334					if (do_status)
2335						(void) alarm(edef_sleep);
2336					if (ioctl(fd, BOFI_CHK_STATE_W,
2337					    &es) == -1) {
2338						if (errno != EINTR) {
2339							perror("bad"
2340							    " BOFI_CHK_STATE");
2341							break;
2342						} else if (!do_status) {
2343							break;
2344						}
2345					}
2346					if (do_status)
2347						(void) fprintf(outfile,
2348						    "%llu:%llu:%u:%u:%u:"
2349						    "%u:%d:\"%s\"\n",
2350						    es.fail_time, es.msg_time,
2351						    es.access_count,
2352						    es.fail_count,
2353						    es.acc_chk, es.errmsg_count,
2354						    (uint_t)es.severity,
2355						    (es.msg_time) ?
2356						    es.buffer : "");
2357					if (es.acc_chk == 0 &&
2358					    es.fail_count == 0 && !do_status)
2359						print_err_reports(outfile,
2360						    &es, "", "", -1);
2361					else if (alarmed) {
2362						alarmed = 0;
2363						if ((timeleft -= edef_sleep) <=
2364						    0) {
2365							if (do_status)
2366								break;
2367							print_err_reports(
2368							    outfile, &es, "",
2369							    "", -1);
2370							break;
2371						}
2372					} else if (!do_status)
2373						print_err_reports(outfile,
2374						    &es, "", "", -1);
2375				} while (es.acc_chk != 0 || es.fail_count != 0);
2376
2377				msg(2, "done: acc_chk 0x%x fcnt %d\n",
2378				    es.acc_chk, es.fail_count);
2379			}
2380
2381			(void) close(fd);
2382		}
2383		free(hdls);
2384		return (0);
2385	}
2386	test_driver(&errdef, collecttime);
2387	return (0);
2388}
2389