1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27
28#include <stdio.h>
29#include <stdio_ext.h>
30#include <stdlib.h>
31#include <unistd.h>
32#include <fcntl.h>
33#include <ctype.h>
34#include <string.h>
35#include <memory.h>
36#include <signal.h>
37#include <wait.h>
38#include <limits.h>
39#include <errno.h>
40#include <sys/types.h>
41#include <sys/time.h>
42#include <sys/times.h>
43#include <sys/fstyp.h>
44#include <sys/fsid.h>
45#include <sys/stat.h>
46#include <sys/mman.h>
47#include <sys/resource.h>
48#include <libproc.h>
49#include <priv.h>
50#include "ramdata.h"
51#include "proto.h"
52#include "htbl.h"
53
54/*
55 * The user can trace individual threads by using the 'pid/1,3-6,8-' syntax.
56 * This structure keeps track of pid/lwp specifications.  If there are no LWPs
57 * specified, then 'lwps' will be NULL.
58 */
59typedef struct proc_set {
60	pid_t		pid;
61	const char 	*lwps;
62} proc_set_t;
63
64/*
65 * Function prototypes for static routines in this file.
66 */
67void	setup_basetime(hrtime_t, struct timeval *);
68int	xcreat(char *);
69void	setoutput(int);
70void	report(private_t *, time_t);
71void	prtim(timestruc_t *);
72void	pids(char *, proc_set_t *);
73void	psargs(private_t *);
74int	control(private_t *, pid_t);
75int	grabit(private_t *, proc_set_t *);
76void	release(private_t *, pid_t);
77void	intr(int);
78int	wait4all(void);
79void	letgo(private_t *);
80void	child_to_file();
81void	file_to_parent();
82void	per_proc_init();
83int	lib_sort(const void *, const void *);
84int	key_sort(const void *, const void *);
85
86void	*worker_thread(void *);
87void	main_thread(int);
88
89/*
90 * Test for empty set.
91 * is_empty() should not be called directly.
92 */
93int	is_empty(const uint32_t *, size_t);
94#define	isemptyset(sp) \
95	is_empty((uint32_t *)(sp), sizeof (*(sp)) / sizeof (uint32_t))
96
97/*
98 * OR the second set into the first set.
99 * or_set() should not be called directly.
100 */
101void	or_set(uint32_t *, const uint32_t *, size_t);
102#define	prorset(sp1, sp2) \
103	or_set((uint32_t *)(sp1), (uint32_t *)(sp2), \
104	sizeof (*(sp1)) / sizeof (uint32_t))
105
106/* fetch or allocate thread-private data */
107private_t *
108get_private()
109{
110	void *value;
111	private_t *pri = NULL;
112
113	if (thr_getspecific(private_key, &value) == 0)
114		pri = value;
115	if (pri == NULL) {
116		pri = my_malloc(sizeof (*pri), NULL);
117		(void) memset(pri, 0, sizeof (*pri));
118		pri->sys_path = my_malloc(pri->sys_psize = 16, NULL);
119		pri->sys_string = my_malloc(pri->sys_ssize = 32, NULL);
120		if (thr_setspecific(private_key, pri) == ENOMEM)
121			abend("memory allocation failure", NULL);
122	}
123	return (pri);
124}
125
126/* destructor function for thread-private data */
127void
128free_private(void *value)
129{
130	private_t *pri = value;
131
132	if (pri->sys_path)
133		free(pri->sys_path);
134	if (pri->sys_string)
135		free(pri->sys_string);
136	if (pri->exec_string)
137		free(pri->exec_string);
138	if (pri->str_buffer)
139		free(pri->str_buffer);
140	free(pri);
141}
142
143/*
144 * This is called by the main thread (via create_thread())
145 * and is also called from other threads in worker_thread()
146 * while holding truss_lock.  No further locking is required.
147 */
148void
149insert_lwpid(lwpid_t lwpid)
150{
151	int i;
152
153	truss_nlwp++;
154	for (i = 0; i < truss_maxlwp; i++) {
155		if (truss_lwpid[i] == 0)
156			break;
157	}
158	if (i == truss_maxlwp) {
159		/* double the size of the array */
160		truss_lwpid = my_realloc(truss_lwpid,
161		    truss_maxlwp * 2 * sizeof (lwpid_t), NULL);
162		(void) memset(&truss_lwpid[truss_maxlwp], 0,
163		    truss_maxlwp * sizeof (lwpid_t));
164		truss_maxlwp *= 2;
165	}
166	truss_lwpid[i] = lwpid;
167}
168
169/*
170 * This is called from the first worker thread to encounter one of
171 * (leave_hung || interrupt || sigusr1).  It must notify all other
172 * worker threads of the same condition.  truss_lock is held.
173 */
174void
175broadcast_signals(void)
176{
177	static int int_notified = FALSE;
178	static int usr1_notified = FALSE;
179	static int usr2_notified = FALSE;
180	lwpid_t my_id = thr_self();
181	lwpid_t lwpid;
182	int i;
183
184	if (interrupt && !int_notified) {
185		int_notified = TRUE;
186		for (i = 0; i < truss_maxlwp; i++) {
187			if ((lwpid = truss_lwpid[i]) != 0 && lwpid != my_id)
188				(void) thr_kill(lwpid, interrupt);
189		}
190	}
191	if (sigusr1 && !usr1_notified) {
192		usr1_notified = TRUE;
193		for (i = 0; i < truss_maxlwp; i++) {
194			if ((lwpid = truss_lwpid[i]) != 0 && lwpid != my_id)
195				(void) thr_kill(lwpid, SIGUSR1);
196		}
197	}
198	if (leave_hung && !usr2_notified) {
199		usr2_notified = TRUE;
200		for (i = 0; i < truss_maxlwp; i++) {
201			if ((lwpid = truss_lwpid[i]) != 0 && lwpid != my_id)
202				(void) thr_kill(lwpid, SIGUSR2);
203		}
204	}
205}
206
207static struct ps_lwphandle *
208grab_lwp(lwpid_t who)
209{
210	struct ps_lwphandle *Lwp;
211	int gcode;
212
213	if ((Lwp = Lgrab(Proc, who, &gcode)) == NULL) {
214		if (gcode != G_NOPROC) {
215			(void) fprintf(stderr,
216			    "%s: cannot grab LWP %u in process %d,"
217			    " reason: %s\n",
218			    command, who, (int)Pstatus(Proc)->pr_pid,
219			    Lgrab_error(gcode));
220			interrupt = SIGTERM;	/* post an interrupt */
221		}
222	}
223	return (Lwp);
224}
225
226/*
227 * Iteration function called for each initial lwp in the controlled process.
228 */
229/* ARGSUSED */
230int
231create_thread(void *arg, const lwpstatus_t *Lsp)
232{
233	struct ps_lwphandle *new_Lwp;
234	lwpid_t lwpid;
235	int *count = arg;
236
237	if (lwptrace(Pstatus(Proc)->pr_pid, Lsp->pr_lwpid))
238		*count += 1;
239
240	if ((new_Lwp = grab_lwp(Lsp->pr_lwpid)) != NULL) {
241		if (thr_create(NULL, 0, worker_thread, new_Lwp,
242		    THR_BOUND | THR_SUSPENDED, &lwpid) != 0)
243			abend("cannot create lwp to follow child lwp", NULL);
244		insert_lwpid(lwpid);
245	}
246	return (0);
247}
248
249int
250main(int argc, char *argv[])
251{
252	private_t *pri;
253	struct tms tms;
254	struct rlimit rlim;
255	int ofd = -1;
256	int opt;
257	int i;
258	int first;
259	int errflg = FALSE;
260	int badname = FALSE;
261	proc_set_t *grab = NULL;
262	const pstatus_t *Psp;
263	const lwpstatus_t *Lsp;
264	int sharedmem;
265
266	/* a few of these need to be initialized to NULL */
267	Cp = NULL;
268	fcall_tbl = NULL;
269
270	/*
271	 * Make sure fd's 0, 1, and 2 are allocated,
272	 * just in case truss was invoked from init.
273	 */
274	while ((i = open("/dev/null", O_RDWR)) >= 0 && i < 2)
275		;
276	if (i > 2)
277		(void) close(i);
278
279	starttime = times(&tms);	/* for elapsed timing */
280
281	/* this should be per-traced-process */
282	pagesize = sysconf(_SC_PAGESIZE);
283
284	/* command name (e.g., "truss") */
285	if ((command = strrchr(argv[0], '/')) != NULL)
286		command++;
287	else
288		command = argv[0];
289
290	/* set up the initial private data */
291	(void) mutex_init(&truss_lock, USYNC_THREAD, NULL);
292	(void) mutex_init(&count_lock, USYNC_THREAD, NULL);
293	(void) cond_init(&truss_cv, USYNC_THREAD, NULL);
294	if (thr_keycreate(&private_key, free_private) == ENOMEM)
295		abend("memory allocation failure", NULL);
296	pri = get_private();
297
298	Euid = geteuid();
299	Egid = getegid();
300	Ruid = getuid();
301	Rgid = getgid();
302	ancestor = getpid();
303
304	prfillset(&trace);	/* default: trace all system calls */
305	premptyset(&verbose);	/* default: no syscall verbosity */
306	premptyset(&rawout);	/* default: no raw syscall interpretation */
307
308	prfillset(&signals);	/* default: trace all signals */
309
310	prfillset(&faults);	/* default: trace all faults */
311	prdelset(&faults, FLTPAGE);	/* except this one */
312
313	premptyset(&readfd);	/* default: dump no buffers */
314	premptyset(&writefd);
315
316	premptyset(&syshang);	/* default: hang on no system calls */
317	premptyset(&sighang);	/* default: hang on no signals */
318	premptyset(&flthang);	/* default: hang on no faults */
319
320	(void) sigemptyset(&emptyset);	/* for unblocking all signals */
321	(void) sigfillset(&fillset);	/* for blocking all signals */
322
323#define	OPTIONS	"FpfcaeildDEht:T:v:x:s:S:m:M:u:U:r:w:o:"
324	while ((opt = getopt(argc, argv, OPTIONS)) != EOF) {
325		switch (opt) {
326		case 'F':		/* force grabbing (no O_EXCL) */
327			Fflag = PGRAB_FORCE;
328			break;
329		case 'p':		/* grab processes */
330			pflag = TRUE;
331			break;
332		case 'f':		/* follow children */
333			fflag = TRUE;
334			break;
335		case 'c':		/* don't trace, just count */
336			cflag = TRUE;
337			iflag = TRUE;	/* implies no interruptable syscalls */
338			break;
339		case 'a':		/* display argument lists */
340			aflag = TRUE;
341			break;
342		case 'e':		/* display environments */
343			eflag = TRUE;
344			break;
345		case 'i':		/* don't show interruptable syscalls */
346			iflag = TRUE;
347			break;
348		case 'l':		/* show lwp id for each syscall */
349			lflag = TRUE;
350			break;
351		case 'h':		/* debugging: report hash stats */
352			hflag = TRUE;
353			break;
354		case 'd':		/* show time stamps */
355			dflag = TRUE;
356			break;
357		case 'D':		/* show time deltas */
358			Dflag = TRUE;
359			break;
360		case 'E':
361			Eflag = TRUE;	/* show syscall times */
362			break;
363		case 't':		/* system calls to trace */
364			if (syslist(optarg, &trace, &tflag))
365				badname = TRUE;
366			break;
367		case 'T':		/* system calls to hang process */
368			if (syslist(optarg, &syshang, &Tflag))
369				badname = TRUE;
370			break;
371		case 'v':		/* verbose interpretation of syscalls */
372			if (syslist(optarg, &verbose, &vflag))
373				badname = TRUE;
374			break;
375		case 'x':		/* raw interpretation of syscalls */
376			if (syslist(optarg, &rawout, &xflag))
377				badname = TRUE;
378			break;
379		case 's':		/* signals to trace */
380			if (siglist(pri, optarg, &signals, &sflag))
381				badname = TRUE;
382			break;
383		case 'S':		/* signals to hang process */
384			if (siglist(pri, optarg, &sighang, &Sflag))
385				badname = TRUE;
386			break;
387		case 'm':		/* machine faults to trace */
388			if (fltlist(optarg, &faults, &mflag))
389				badname = TRUE;
390			break;
391		case 'M':		/* machine faults to hang process */
392			if (fltlist(optarg, &flthang, &Mflag))
393				badname = TRUE;
394			break;
395		case 'u':		/* user library functions to trace */
396			if (liblist(optarg, 0))
397				badname = TRUE;
398			break;
399		case 'U':		/* user library functions to hang */
400			if (liblist(optarg, 1))
401				badname = TRUE;
402			break;
403		case 'r':		/* show contents of read(fd) */
404			if (fdlist(optarg, &readfd))
405				badname = TRUE;
406			break;
407		case 'w':		/* show contents of write(fd) */
408			if (fdlist(optarg, &writefd))
409				badname = TRUE;
410			break;
411		case 'o':		/* output file for trace */
412			oflag = TRUE;
413			if (ofd >= 0)
414				(void) close(ofd);
415			if ((ofd = xcreat(optarg)) < 0) {
416				perror(optarg);
417				badname = TRUE;
418			}
419			break;
420		default:
421			errflg = TRUE;
422			break;
423		}
424	}
425
426	if (badname)
427		exit(2);
428
429	/* if -a or -e was specified, force tracing of exec() */
430	if (aflag || eflag)
431		praddset(&trace, SYS_execve);
432
433	/*
434	 * Make sure that all system calls, signals, and machine faults
435	 * that hang the process are added to their trace sets.
436	 */
437	prorset(&trace, &syshang);
438	prorset(&signals, &sighang);
439	prorset(&faults, &flthang);
440
441	argc -= optind;
442	argv += optind;
443
444	/* collect the specified process ids */
445	if (pflag && argc > 0) {
446		grab = my_malloc(argc * sizeof (proc_set_t),
447		    "memory for process-ids");
448		while (argc-- > 0)
449			pids(*argv++, grab);
450	}
451
452	if (errflg || (argc <= 0 && ngrab <= 0)) {
453		(void) fprintf(stderr,
454	"usage:\t%s [-fcaeildDEF] [-[tTvx] [!]syscalls] [-[sS] [!]signals]\\\n",
455		    command);
456		(void) fprintf(stderr,
457	"\t[-[mM] [!]faults] [-[rw] [!]fds] [-[uU] [!]libs:[:][!]funcs]\\\n");
458		(void) fprintf(stderr,
459		    "\t[-o outfile]  command | -p pid[/lwps] ...\n");
460		exit(2);
461	}
462
463	if (argc > 0) {		/* create the controlled process */
464		int err;
465		char path[PATH_MAX];
466
467		Proc = Pcreate(argv[0], &argv[0], &err, path, sizeof (path));
468		if (Proc == NULL) {
469			switch (err) {
470			case C_PERM:
471				(void) fprintf(stderr,
472				    "%s: cannot trace set-id or "
473				    "unreadable object file: %s\n",
474				    command, path);
475				break;
476			case C_LP64:
477				(void) fprintf(stderr,
478				    "%s: cannot control _LP64 "
479				    "program: %s\n",
480				    command, path);
481				break;
482			case C_NOEXEC:
483				(void) fprintf(stderr,
484				    "%s: cannot execute program: %s\n",
485				    command, argv[0]);
486				break;
487			case C_NOENT:
488				(void) fprintf(stderr,
489				    "%s: cannot find program: %s\n",
490				    command, argv[0]);
491				break;
492			case C_STRANGE:
493				break;
494			default:
495				(void) fprintf(stderr, "%s: %s\n",
496				    command, Pcreate_error(err));
497				break;
498			}
499			exit(2);
500		}
501		if (fflag || Dynpat != NULL)
502			(void) Psetflags(Proc, PR_FORK);
503		else
504			(void) Punsetflags(Proc, PR_FORK);
505		Psp = Pstatus(Proc);
506		Lsp = &Psp->pr_lwp;
507		pri->lwpstat = Lsp;
508		data_model = Psp->pr_dmodel;
509		created = Psp->pr_pid;
510		make_pname(pri, 0);
511		(void) sysentry(pri, 1);
512		pri->length = 0;
513		if (!cflag && prismember(&trace, SYS_execve)) {
514			pri->exec_string = my_realloc(pri->exec_string,
515			    strlen(pri->sys_string) + 1, NULL);
516			(void) strcpy(pri->exec_pname, pri->pname);
517			(void) strcpy(pri->exec_string, pri->sys_string);
518			pri->length += strlen(pri->sys_string);
519			pri->exec_lwpid = pri->lwpstat->pr_lwpid;
520			pri->sys_leng = 0;
521			*pri->sys_string = '\0';
522		}
523		pri->syslast = Psp->pr_stime;
524		pri->usrlast = Psp->pr_utime;
525	}
526
527	/*
528	 * Now that we have created the victim process,
529	 * give ourself a million file descriptors.
530	 * This is enough to deal with a multithreaded
531	 * victim process that has half a million lwps.
532	 */
533	rlim.rlim_cur = 1024 * 1024;
534	rlim.rlim_max = 1024 * 1024;
535	if ((Euid != 0 || setrlimit(RLIMIT_NOFILE, &rlim) != 0) &&
536	    getrlimit(RLIMIT_NOFILE, &rlim) == 0) {
537		/*
538		 * Failing the million, give ourself as many
539		 * file descriptors as we can get.
540		 */
541		rlim.rlim_cur = rlim.rlim_max;
542		(void) setrlimit(RLIMIT_NOFILE, &rlim);
543	}
544	(void) enable_extended_FILE_stdio(-1, -1);
545
546	setoutput(ofd);		/* establish truss output */
547	istty = isatty(1);
548
549	if (setvbuf(stdout, (char *)NULL, _IOFBF, MYBUFSIZ) != 0)
550		abend("setvbuf() failure", NULL);
551
552	/*
553	 * Set up signal dispositions.
554	 */
555	if (created && (oflag || !istty)) {	/* ignore interrupts */
556		(void) sigset(SIGHUP, SIG_IGN);
557		(void) sigset(SIGINT, SIG_IGN);
558		(void) sigset(SIGQUIT, SIG_IGN);
559	} else {				/* receive interrupts */
560		if (sigset(SIGHUP, SIG_IGN) == SIG_DFL)
561			(void) sigset(SIGHUP, intr);
562		if (sigset(SIGINT, SIG_IGN) == SIG_DFL)
563			(void) sigset(SIGINT, intr);
564		if (sigset(SIGQUIT, SIG_IGN) == SIG_DFL)
565			(void) sigset(SIGQUIT, intr);
566	}
567	(void) sigset(SIGTERM, intr);
568	(void) sigset(SIGUSR1, intr);
569	(void) sigset(SIGUSR2, intr);
570	(void) sigset(SIGPIPE, intr);
571
572	/* don't accumulate zombie children */
573	(void) sigset(SIGCLD, SIG_IGN);
574
575	/* create shared mem space for global mutexes */
576
577	sharedmem = (fflag || Dynpat != NULL || ngrab > 1);
578	gps = (void *)mmap(NULL, sizeof (struct global_psinfo),
579	    PROT_READ|PROT_WRITE,
580	    MAP_ANON | (sharedmem? MAP_SHARED : MAP_PRIVATE),
581	    -1, (off_t)0);
582	if (gps == MAP_FAILED)
583		abend("cannot allocate ", "memory for counts");
584	i = sharedmem? USYNC_PROCESS : USYNC_THREAD;
585	(void) mutex_init(&gps->ps_mutex0, i, NULL);
586	(void) mutex_init(&gps->ps_mutex1, i, NULL);
587	(void) mutex_init(&gps->fork_lock, i, NULL);
588	(void) cond_init(&gps->fork_cv, i, NULL);
589
590
591	/* config tmp file if counting and following */
592	if (fflag && cflag) {
593		char *tmps = tempnam("/var/tmp", "truss");
594		sfd = open(tmps, O_CREAT|O_APPEND|O_EXCL|O_RDWR, 0600);
595		if (sfd == -1)
596			abend("Error creating tmpfile", NULL);
597		if (unlink(tmps) == -1)
598			abend("Error unlinking tmpfile", NULL);
599		free(tmps);
600		tmps = NULL;
601	}
602
603	if (created) {
604		per_proc_init();
605		procadd(created, NULL);
606		show_cred(pri, TRUE, FALSE);
607	} else {		/* grab the specified processes */
608		int gotone = FALSE;
609
610		i = 0;
611		while (i < ngrab) {		/* grab first process */
612			if (grabit(pri, &grab[i++])) {
613				Psp = Pstatus(Proc);
614				Lsp = &Psp->pr_lwp;
615				gotone = TRUE;
616				break;
617			}
618		}
619		if (!gotone)
620			abend(NULL, NULL);
621		per_proc_init();
622		while (i < ngrab) {		/* grab the remainder */
623			proc_set_t *set = &grab[i++];
624
625			(void) mutex_lock(&truss_lock);
626			switch (fork()) {
627			case -1:
628				(void) fprintf(stderr,
629			"%s: cannot fork to control process, pid# %d\n",
630				    command, (int)set->pid);
631				/* FALLTHROUGH */
632			default:
633				(void) mutex_unlock(&truss_lock);
634				continue;	/* parent carries on */
635
636			case 0:			/* child grabs process */
637				(void) mutex_unlock(&truss_lock);
638				Pfree(Proc);
639				descendent = TRUE;
640				if (grabit(pri, set)) {
641					Psp = Pstatus(Proc);
642					Lsp = &Psp->pr_lwp;
643					per_proc_init();
644					break;
645				}
646				exit(2);
647			}
648			break;
649		}
650		free(grab);
651	}
652
653
654	/*
655	 * If running setuid-root, become root for real to avoid
656	 * affecting the per-user limitation on the maximum number
657	 * of processes (one benefit of running setuid-root).
658	 */
659	if (Rgid != Egid)
660		(void) setgid(Egid);
661	if (Ruid != Euid)
662		(void) setuid(Euid);
663
664	if (!created && aflag && prismember(&trace, SYS_execve)) {
665		psargs(pri);
666		Flush();
667	}
668
669	if (created && Pstate(Proc) != PS_STOP)	/* assertion */
670		if (!(interrupt | sigusr1))
671			abend("ASSERT error: process is not stopped", NULL);
672
673	traceeven = trace;		/* trace these system calls */
674
675	/* trace these regardless, even if we don't report results */
676	praddset(&traceeven, SYS_exit);
677	praddset(&traceeven, SYS_lwp_create);
678	praddset(&traceeven, SYS_lwp_exit);
679	praddset(&traceeven, SYS_execve);
680	praddset(&traceeven, SYS_openat);
681	praddset(&traceeven, SYS_openat64);
682	praddset(&traceeven, SYS_open);
683	praddset(&traceeven, SYS_open64);
684	praddset(&traceeven, SYS_vfork);
685	praddset(&traceeven, SYS_forksys);
686
687	/* for I/O buffer dumps, force tracing of read()s and write()s */
688	if (!isemptyset(&readfd)) {
689		praddset(&traceeven, SYS_read);
690		praddset(&traceeven, SYS_readv);
691		praddset(&traceeven, SYS_pread);
692		praddset(&traceeven, SYS_pread64);
693		praddset(&traceeven, SYS_recv);
694		praddset(&traceeven, SYS_recvfrom);
695		praddset(&traceeven, SYS_recvmsg);
696	}
697	if (!isemptyset(&writefd)) {
698		praddset(&traceeven, SYS_write);
699		praddset(&traceeven, SYS_writev);
700		praddset(&traceeven, SYS_pwrite);
701		praddset(&traceeven, SYS_pwrite64);
702		praddset(&traceeven, SYS_send);
703		praddset(&traceeven, SYS_sendto);
704		praddset(&traceeven, SYS_sendmsg);
705	}
706
707	if (cflag || Eflag) {
708		Psetsysentry(Proc, &traceeven);
709	}
710	Psetsysexit(Proc, &traceeven);
711
712	/* special case -- cannot trace sysexit because context is changed */
713	if (prismember(&trace, SYS_context)) {
714		(void) Psysentry(Proc, SYS_context, TRUE);
715		(void) Psysexit(Proc, SYS_context, FALSE);
716		prdelset(&traceeven, SYS_context);
717	}
718
719	/* special case -- trace exec() on entry to get the args */
720	(void) Psysentry(Proc, SYS_execve, TRUE);
721
722	/* special case -- sysexit never reached */
723	(void) Psysentry(Proc, SYS_exit, TRUE);
724	(void) Psysentry(Proc, SYS_lwp_exit, TRUE);
725	(void) Psysexit(Proc, SYS_exit, FALSE);
726	(void) Psysexit(Proc, SYS_lwp_exit, FALSE);
727
728	Psetsignal(Proc, &signals);	/* trace these signals */
729	Psetfault(Proc, &faults);	/* trace these faults */
730
731	/* for function call tracing */
732	if (Dynpat != NULL) {
733		/* trace these regardless, to deal with function calls */
734		(void) Pfault(Proc, FLTBPT, TRUE);
735		(void) Pfault(Proc, FLTTRACE, TRUE);
736
737		/* needed for x86 */
738		(void) Psetflags(Proc, PR_BPTADJ);
739
740		/*
741		 * Find functions and set breakpoints on grabbed process.
742		 * A process stopped on exec() gets its breakpoints set below.
743		 */
744		if ((Lsp->pr_why != PR_SYSENTRY &&
745		    Lsp->pr_why != PR_SYSEXIT) ||
746		    Lsp->pr_what != SYS_execve) {
747			establish_breakpoints();
748			establish_stacks();
749		}
750	}
751
752	/*
753	 * Use asynchronous-stop for multithreaded truss.
754	 * truss runs one lwp for each lwp in the target process.
755	 */
756	(void) Psetflags(Proc, PR_ASYNC);
757
758	/* flush out all tracing flags now. */
759	Psync(Proc);
760
761	/*
762	 * If we grabbed a running process, set it running again.
763	 * Since we are tracing lwp_create() and lwp_exit(), the
764	 * lwps will not change in the process until we create all
765	 * of the truss worker threads.
766	 * We leave a created process stopped so its exec() can be reported.
767	 */
768	first = created? FALSE : TRUE;
769	if (!created &&
770	    ((Pstate(Proc) == PS_STOP && Lsp->pr_why == PR_REQUESTED) ||
771	    (Lsp->pr_flags & PR_DSTOP)))
772		first = FALSE;
773
774	main_thread(first);
775	return (0);
776}
777
778/*
779 * Called from main() and from control() after fork().
780 */
781void
782main_thread(int first)
783{
784	private_t *pri = get_private();
785	struct tms tms;
786	int flags;
787	int retc;
788	int i;
789	int count;
790
791	/*
792	 * Block all signals in the main thread.
793	 * Some worker thread will receive signals.
794	 */
795	(void) thr_sigsetmask(SIG_SETMASK, &fillset, NULL);
796
797	/*
798	 * If we are dealing with a previously hung process,
799	 * arrange not to leave it hung on the same system call.
800	 */
801	primary_lwp = (first && Pstate(Proc) == PS_STOP)?
802	    Pstatus(Proc)->pr_lwp.pr_lwpid : 0;
803
804	/*
805	 * Create worker threads to match the lwps in the target process.
806	 */
807	truss_nlwp = 0;
808	truss_maxlwp = 1;
809	truss_lwpid = my_realloc(truss_lwpid, sizeof (lwpid_t), NULL);
810	truss_lwpid[0] = 0;
811	count = 0;
812	(void) Plwp_iter(Proc, create_thread, &count);
813
814	if (count == 0) {
815		(void) printf("(Warning: no matching active LWPs found, "
816		    "waiting)\n");
817		Flush();
818	}
819
820	/*
821	 * Set all of the truss worker threads running now.
822	 */
823	(void) mutex_lock(&truss_lock);
824	for (i = 0; i < truss_maxlwp; i++) {
825		if (truss_lwpid[i])
826			(void) thr_continue(truss_lwpid[i]);
827	}
828	(void) mutex_unlock(&truss_lock);
829
830	/*
831	 * Wait until all worker threads terminate.
832	 */
833	while (thr_join(0, NULL, NULL) == 0)
834		continue;
835
836	(void) Punsetflags(Proc, PR_ASYNC);
837	Psync(Proc);
838	if (sigusr1)
839		letgo(pri);
840	flags = PRELEASE_CLEAR;
841	if (leave_hung)
842		flags |= PRELEASE_HANG;
843	Prelease(Proc, flags);
844
845	procdel();
846	retc = (leave_hung? 0 : wait4all());
847
848	if (!descendent) {
849		interrupt = 0;	/* another interrupt kills the report */
850		if (cflag) {
851			if (fflag)
852				file_to_parent();
853			report(pri, times(&tms) - starttime);
854		}
855	} else if (cflag && fflag) {
856		child_to_file();
857	}
858
859	exit(retc);	/* exit with exit status of created process, else 0 */
860}
861
862void *
863worker_thread(void *arg)
864{
865	struct ps_lwphandle *Lwp = (struct ps_lwphandle *)arg;
866	const pstatus_t *Psp = Pstatus(Proc);
867	const lwpstatus_t *Lsp = Lstatus(Lwp);
868	struct syscount *scp;
869	lwpid_t who = Lsp->pr_lwpid;
870	int first = (who == primary_lwp);
871	private_t *pri = get_private();
872	int req_flag = 0;
873	int leave_it_hung = FALSE;
874	int reset_traps = FALSE;
875	int gcode;
876	int what;
877	int ow_in_effect = 0;
878	long ow_syscall = 0;
879	long ow_subcode = 0;
880	char *ow_string = NULL;
881	sysset_t full_set;
882	sysset_t running_set;
883	int dotrace = lwptrace(Psp->pr_pid, Lsp->pr_lwpid);
884
885	pri->Lwp = Lwp;
886	pri->lwpstat = Lsp;
887	pri->syslast = Lsp->pr_stime;
888	pri->usrlast = Lsp->pr_utime;
889	make_pname(pri, 0);
890
891	prfillset(&full_set);
892
893	/* we were created with all signals blocked; unblock them */
894	(void) thr_sigsetmask(SIG_SETMASK, &emptyset, NULL);
895
896	/*
897	 * Run this loop until the victim lwp terminates or we receive
898	 * a termination condition (leave_hung | interrupt | sigusr1).
899	 */
900	for (;;) {
901		if (interrupt | sigusr1) {
902			(void) Lstop(Lwp, MILLISEC);
903			if (Lstate(Lwp) == PS_RUN)
904				break;
905		}
906		if (Lstate(Lwp) == PS_RUN) {
907			/* millisecond timeout is for sleeping syscalls */
908			uint_t tout = (iflag || req_flag)? 0 : MILLISEC;
909
910			/*
911			 * If we are to leave this lwp stopped in sympathy
912			 * with another lwp that has been left hung, or if
913			 * we have been interrupted or instructed to release
914			 * our victim process, and this lwp is stopped but
915			 * not on an event of interest to /proc, then just
916			 * leave it in that state.
917			 */
918			if ((leave_hung | interrupt | sigusr1) &&
919			    (Lsp->pr_flags & (PR_STOPPED|PR_ISTOP))
920			    == PR_STOPPED)
921				break;
922
923			(void) Lwait(Lwp, tout);
924			if (Lstate(Lwp) == PS_RUN &&
925			    tout != 0 && !(interrupt | sigusr1)) {
926				(void) mutex_lock(&truss_lock);
927				if ((Lsp->pr_flags & PR_STOPPED) &&
928				    Lsp->pr_why == PR_JOBCONTROL)
929					req_flag = jobcontrol(pri, dotrace);
930				else
931					req_flag = requested(pri, req_flag,
932					    dotrace);
933				(void) mutex_unlock(&truss_lock);
934			}
935			continue;
936		}
937		data_model = Psp->pr_dmodel;
938		if (Lstate(Lwp) == PS_UNDEAD)
939			break;
940		if (Lstate(Lwp) == PS_LOST) {	/* we lost control */
941			/*
942			 * After exec(), only one LWP remains in the process.
943			 * /proc makes the thread following that LWP receive
944			 * EAGAIN (PS_LOST) if the program being exec()ed
945			 * is a set-id program.  Every other controlling
946			 * thread receives ENOENT (because its LWP vanished).
947			 * We are the controlling thread for the exec()ing LWP.
948			 * We must wait until all of our siblings terminate
949			 * before attempting to reopen the process.
950			 */
951			(void) mutex_lock(&truss_lock);
952			while (truss_nlwp > 1)
953				(void) cond_wait(&truss_cv, &truss_lock);
954			if (Preopen(Proc) == 0) { /* we got control back */
955				/*
956				 * We have to free and re-grab the LWP.
957				 * The process is guaranteed to be at exit
958				 * from exec() or execve() and have only
959				 * one LWP, namely this one, and the LWP
960				 * is guaranteed to have lwpid == 1.
961				 * This "cannot fail".
962				 */
963				who = 1;
964				Lfree(Lwp);
965				pri->Lwp = Lwp =
966				    Lgrab(Proc, who, &gcode);
967				if (Lwp == NULL)
968					abend("Lgrab error: ",
969					    Lgrab_error(gcode));
970				pri->lwpstat = Lsp = Lstatus(Lwp);
971				(void) mutex_unlock(&truss_lock);
972				continue;
973			}
974
975			/* we really lost it */
976			if (pri->exec_string && *pri->exec_string) {
977				if (pri->exec_pname[0] != '\0')
978					(void) fputs(pri->exec_pname, stdout);
979				timestamp(pri);
980				(void) fputs(pri->exec_string, stdout);
981				(void) fputc('\n', stdout);
982			} else if (pri->length) {
983				(void) fputc('\n', stdout);
984			}
985			if (pri->sys_valid)
986				(void) printf(
987			"%s\t*** cannot trace across exec() of %s ***\n",
988				    pri->pname, pri->sys_path);
989			else
990				(void) printf(
991				"%s\t*** lost control of process ***\n",
992				    pri->pname);
993			pri->length = 0;
994			Flush();
995			(void) mutex_unlock(&truss_lock);
996			break;
997		}
998		if (Lstate(Lwp) != PS_STOP) {
999			(void) fprintf(stderr,
1000			    "%s: state = %d\n", command, Lstate(Lwp));
1001			abend(pri->pname, "uncaught status of subject lwp");
1002		}
1003
1004		make_pname(pri, 0);
1005
1006		(void) mutex_lock(&truss_lock);
1007
1008		what = Lsp->pr_what;
1009		req_flag = 0;
1010
1011		switch (Lsp->pr_why) {
1012		case PR_REQUESTED:
1013			break;
1014		case PR_SIGNALLED:
1015			req_flag = signalled(pri, req_flag, dotrace);
1016			if (Sflag && !first && prismember(&sighang, what))
1017				leave_it_hung = TRUE;
1018			break;
1019		case PR_FAULTED:
1020			if (what == FLTBPT) {
1021				int rval;
1022
1023				(void) Pstop(Proc, 0);
1024				rval = function_trace(pri, first, 0, dotrace);
1025				if (rval == 1)
1026					leave_it_hung = TRUE;
1027				if (rval >= 0)
1028					break;
1029			}
1030			if (faulted(pri, dotrace) &&
1031			    Mflag && !first && prismember(&flthang, what))
1032				leave_it_hung = TRUE;
1033			break;
1034		case PR_JOBCONTROL:	/* can't happen except first time */
1035			req_flag = jobcontrol(pri, dotrace);
1036			break;
1037		case PR_SYSENTRY:
1038			/* protect ourself from operating system error */
1039			if (what <= 0 || what > PRMAXSYS)
1040				what = PRMAXSYS;
1041			pri->length = 0;
1042			/*
1043			 * ow_in_effect checks to see whether or not we
1044			 * are attempting to quantify the time spent in
1045			 * a one way system call.  This is necessary as
1046			 * some system calls never return, yet it is desireable
1047			 * to determine how much time the traced process
1048			 * spends in these calls.  To do this, a one way
1049			 * flag is set on SYSENTRY when the call is recieved.
1050			 * After this, the call mask for the SYSENTRY events
1051			 * is filled so that the traced process will stop
1052			 * on the entry to the very next system call.
1053			 * This appears to the the best way to determine
1054			 * system time elapsed between a one way system call.
1055			 * Once the next call occurs, values that have been
1056			 * stashed are used to record the correct syscall
1057			 * and time, and the SYSENTRY event mask is restored
1058			 * so that the traced process may continue.
1059			 */
1060			if (dotrace && ow_in_effect) {
1061				if (cflag) {
1062					(void) mutex_lock(&count_lock);
1063					scp = Cp->syscount[ow_syscall];
1064					if (ow_subcode != -1)
1065						scp += ow_subcode;
1066					scp->count++;
1067					accumulate(&scp->stime,
1068					    &Lsp->pr_stime, &pri->syslast);
1069					accumulate(&Cp->usrtotal,
1070					    &Lsp->pr_utime, &pri->usrlast);
1071					pri->syslast = Lsp->pr_stime;
1072					pri->usrlast = Lsp->pr_utime;
1073					(void) mutex_unlock(&count_lock);
1074				} else if (Eflag) {
1075					putpname(pri);
1076					timestamp(pri);
1077					(void) printf("%s\n", ow_string);
1078					free(ow_string);
1079					ow_string = NULL;
1080					pri->syslast = Lsp->pr_stime;
1081				}
1082				ow_in_effect = 0;
1083				Psetsysentry(Proc, &running_set);
1084			}
1085
1086			/*
1087			 * Special cases.  Most syscalls are traced on exit.
1088			 */
1089			switch (what) {
1090			case SYS_exit:			/* exit() */
1091			case SYS_lwp_exit:		/* lwp_exit() */
1092			case SYS_context:		/* [get|set]context() */
1093				if (dotrace && cflag &&
1094				    prismember(&trace, what)) {
1095					ow_in_effect = 1;
1096					ow_syscall = what;
1097					ow_subcode = getsubcode(pri);
1098					pri->syslast = Lsp->pr_stime;
1099					running_set =
1100					    (Pstatus(Proc))->pr_sysentry;
1101					Psetsysentry(Proc, &full_set);
1102				} else if (dotrace && Eflag &&
1103				    prismember(&trace, what)) {
1104					(void) sysentry(pri, dotrace);
1105					ow_in_effect = 1;
1106					ow_string = my_malloc(
1107					    strlen(pri->sys_string) + 1, NULL);
1108					(void) strcpy(ow_string,
1109					    pri->sys_string);
1110					running_set =
1111					    (Pstatus(Proc))->pr_sysentry;
1112					Psetsysentry(Proc, &full_set);
1113					pri->syslast = Lsp->pr_stime;
1114				} else if (dotrace &&
1115				    prismember(&trace, what)) {
1116					(void) sysentry(pri, dotrace);
1117					putpname(pri);
1118					timestamp(pri);
1119					pri->length +=
1120					    printf("%s\n", pri->sys_string);
1121					Flush();
1122				}
1123				pri->sys_leng = 0;
1124				*pri->sys_string = '\0';
1125
1126				if (what == SYS_exit)
1127					exit_called = TRUE;
1128				break;
1129			case SYS_execve:
1130				show_cred(pri, FALSE, TRUE);
1131				(void) sysentry(pri, dotrace);
1132				if (dotrace && !cflag &&
1133				    prismember(&trace, what)) {
1134					pri->exec_string =
1135					    my_realloc(pri->exec_string,
1136					    strlen(pri->sys_string) + 1,
1137					    NULL);
1138					(void) strcpy(pri->exec_pname,
1139					    pri->pname);
1140					(void) strcpy(pri->exec_string,
1141					    pri->sys_string);
1142					pri->length += strlen(pri->sys_string);
1143					pri->exec_lwpid = Lsp->pr_lwpid;
1144				}
1145				pri->sys_leng = 0;
1146				*pri->sys_string = '\0';
1147				break;
1148			default:
1149				if (dotrace && (cflag || Eflag) &&
1150				    prismember(&trace, what)) {
1151					pri->syslast = Lsp->pr_stime;
1152				}
1153				break;
1154			}
1155			if (dotrace && Tflag && !first &&
1156			    (prismember(&syshang, what) ||
1157			    (exit_called && prismember(&syshang, SYS_exit))))
1158				leave_it_hung = TRUE;
1159			break;
1160		case PR_SYSEXIT:
1161			/* check for write open of a /proc file */
1162			if (what == SYS_openat || what == SYS_openat64 ||
1163			    what == SYS_open || what == SYS_open64) {
1164				int readonly;
1165
1166				(void) sysentry(pri, dotrace);
1167				pri->Errno = Lsp->pr_errno;
1168				pri->ErrPriv = Lsp->pr_errpriv;
1169				readonly =
1170				    ((what == SYS_openat ||
1171				    what == SYS_openat64) &&
1172				    pri->sys_nargs > 2 &&
1173				    (pri->sys_args[2]&0x3) == O_RDONLY) ||
1174				    ((what == SYS_open ||
1175				    what == SYS_open64) &&
1176				    pri->sys_nargs > 1 &&
1177				    (pri->sys_args[1]&0x3) == O_RDONLY);
1178				if ((pri->Errno == 0 || pri->Errno == EBUSY) &&
1179				    pri->sys_valid && !readonly) {
1180					int rv = checkproc(pri);
1181					if (rv == 1 && Fflag != PGRAB_FORCE) {
1182						/*
1183						 * The process opened itself
1184						 * and no -F flag was specified.
1185						 * Just print the open() call
1186						 * and let go of the process.
1187						 */
1188						if (dotrace && !cflag &&
1189						    prismember(&trace, what)) {
1190							putpname(pri);
1191							timestamp(pri);
1192							(void) printf("%s\n",
1193							    pri->sys_string);
1194							Flush();
1195						}
1196						sigusr1 = TRUE;
1197						(void) mutex_unlock(
1198						    &truss_lock);
1199						goto out;
1200					}
1201					if (rv == 2) {
1202						/*
1203						 * Process opened someone else.
1204						 * The open is being reissued.
1205						 * Don't report this one.
1206						 */
1207						pri->sys_leng = 0;
1208						*pri->sys_string = '\0';
1209						pri->sys_nargs = 0;
1210						break;
1211					}
1212				}
1213			}
1214			if (what == SYS_execve && pri->Errno == 0) {
1215				/*
1216				 * Refresh the data model on exec() in case it
1217				 * is different from the parent.  Lwait()
1218				 * doesn't update process-wide status, so we
1219				 * have to explicitly call Pstopstatus() to get
1220				 * the new state.
1221				 */
1222				(void) Pstopstatus(Proc, PCNULL, 0);
1223				data_model = Psp->pr_dmodel;
1224			}
1225			if (sysexit(pri, dotrace))
1226				Flush();
1227			if (what == SYS_lwp_create && pri->Rval1 != 0) {
1228				struct ps_lwphandle *new_Lwp;
1229				lwpid_t lwpid;
1230
1231				if ((new_Lwp = grab_lwp(pri->Rval1)) != NULL) {
1232					(void) thr_sigsetmask(SIG_SETMASK,
1233					    &fillset, NULL);
1234					if (thr_create(NULL, 0, worker_thread,
1235					    new_Lwp, THR_BOUND | THR_SUSPENDED,
1236					    &lwpid) != 0)
1237						abend("cannot create lwp ",
1238						    "to follow child lwp");
1239					insert_lwpid(lwpid);
1240					(void) thr_continue(lwpid);
1241					(void) thr_sigsetmask(SIG_SETMASK,
1242					    &emptyset, NULL);
1243				}
1244			}
1245			pri->sys_nargs = 0;
1246			if (dotrace && Tflag && !first &&
1247			    prismember(&syshang, what))
1248				leave_it_hung = TRUE;
1249			if (what == SYS_execve && pri->Errno == 0) {
1250				is_vfork_child = FALSE;
1251				reset_breakpoints();
1252				/*
1253				 * exec() resets the calling LWP's lwpid to 1.
1254				 * If the LWP has changed its lwpid, then
1255				 * we have to free and re-grab the LWP
1256				 * in order to keep libproc consistent.
1257				 * This "cannot fail".
1258				 */
1259				if (who != Lsp->pr_lwpid) {
1260					/*
1261					 * We must wait for all of our
1262					 * siblings to terminate.
1263					 */
1264					while (truss_nlwp > 1)
1265						(void) cond_wait(&truss_cv,
1266						    &truss_lock);
1267					who = Lsp->pr_lwpid;
1268					Lfree(Lwp);
1269					pri->Lwp = Lwp =
1270					    Lgrab(Proc, who, &gcode);
1271					if (Lwp == NULL)
1272						abend("Lgrab error: ",
1273						    Lgrab_error(gcode));
1274					pri->lwpstat = Lsp = Lstatus(Lwp);
1275				}
1276			}
1277			break;
1278		default:
1279			req_flag = 0;
1280			(void) fprintf(stderr,
1281			    "unknown reason for stopping: %d/%d\n",
1282			    Lsp->pr_why, what);
1283			abend(NULL, NULL);
1284		}
1285
1286		if (pri->child) {	/* controlled process fork()ed */
1287			if (fflag || Dynpat != NULL)  {
1288				if (Lsp->pr_why == PR_SYSEXIT &&
1289				    (Lsp->pr_what == SYS_vfork ||
1290				    (Lsp->pr_what == SYS_forksys &&
1291				    Lsp->pr_sysarg[0] == 2))) {
1292					is_vfork_child = TRUE;
1293					(void) Pstop(Proc, 0);
1294				}
1295				if (control(pri, pri->child)) {
1296					(void) mutex_unlock(&truss_lock);
1297					pri->child = 0;
1298					if (!fflag) {
1299						/*
1300						 * If this is vfork(), then
1301						 * this clears the breakpoints
1302						 * in the parent's address space
1303						 * as well as in the child's.
1304						 */
1305						clear_breakpoints();
1306						Prelease(Proc, PRELEASE_CLEAR);
1307						_exit(0);
1308					}
1309					main_thread(FALSE);
1310					/* NOTREACHED */
1311				}
1312
1313				/*
1314				 * Here, we are still the parent truss.
1315				 * If the child messes with the breakpoints and
1316				 * this is vfork(), we have to set them again.
1317				 */
1318				if (Dynpat != NULL && is_vfork_child && !fflag)
1319					reset_traps = TRUE;
1320				is_vfork_child = FALSE;
1321			}
1322			pri->child = 0;
1323		}
1324
1325		if (leave_it_hung) {
1326			(void) mutex_unlock(&truss_lock);
1327			break;
1328		}
1329
1330		if (reset_traps) {
1331			/*
1332			 * To recover from vfork, we must catch the lwp
1333			 * that issued the vfork() when it returns to user
1334			 * level, with all other lwps remaining stopped.
1335			 * For this purpose, we have directed all lwps to
1336			 * stop and we now set the vfork()ing lwp running
1337			 * with the PRSTEP flag.  We expect to capture it
1338			 * when it stops again showing PR_FAULTED/FLTTRACE.
1339			 * We are holding truss_lock, so no other threads
1340			 * in truss will set any other lwps in the victim
1341			 * process running.
1342			 */
1343			reset_traps = FALSE;
1344			(void) Lsetrun(Lwp, 0, PRSTEP);
1345			do {
1346				(void) Lwait(Lwp, 0);
1347			} while (Lstate(Lwp) == PS_RUN);
1348			if (Lstate(Lwp) == PS_STOP &&
1349			    Lsp->pr_why == PR_FAULTED &&
1350			    Lsp->pr_what == FLTTRACE) {
1351				reestablish_traps();
1352				(void) Lsetrun(Lwp, 0, PRCFAULT|PRSTOP);
1353			} else {
1354				(void) printf("%s\t*** Expected PR_FAULTED/"
1355				    "FLTTRACE stop following vfork()\n",
1356				    pri->pname);
1357			}
1358		}
1359
1360		if (Lstate(Lwp) == PS_STOP) {
1361			int flags = 0;
1362
1363			if (interrupt | sigusr1) {
1364				(void) mutex_unlock(&truss_lock);
1365				break;
1366			}
1367			/*
1368			 * If we must leave this lwp hung is sympathy with
1369			 * another lwp that is being left hung on purpose,
1370			 * then push the state onward toward PR_REQUESTED.
1371			 */
1372			if (leave_hung) {
1373				if (Lsp->pr_why == PR_REQUESTED) {
1374					(void) mutex_unlock(&truss_lock);
1375					break;
1376				}
1377				flags |= PRSTOP;
1378			}
1379			if (Lsetrun(Lwp, 0, flags) != 0 &&
1380			    Lstate(Lwp) != PS_LOST &&
1381			    Lstate(Lwp) != PS_UNDEAD) {
1382				(void) mutex_unlock(&truss_lock);
1383				perror("Lsetrun");
1384				abend("cannot start subject lwp", NULL);
1385				/* NOTREACHED */
1386			}
1387		}
1388		first = FALSE;
1389
1390		(void) mutex_unlock(&truss_lock);
1391	}
1392
1393out:
1394	/* block all signals in preparation for exiting */
1395	(void) thr_sigsetmask(SIG_SETMASK, &fillset, NULL);
1396
1397	if (Lstate(Lwp) == PS_UNDEAD || Lstate(Lwp) == PS_LOST)
1398		(void) mutex_lock(&truss_lock);
1399	else {
1400		(void) Lstop(Lwp, MILLISEC);
1401		(void) mutex_lock(&truss_lock);
1402		if (Lstate(Lwp) == PS_STOP &&
1403		    Lsp->pr_why == PR_FAULTED &&
1404		    Lsp->pr_what == FLTBPT)
1405			(void) function_trace(pri, 0, 1, dotrace);
1406	}
1407
1408	if (dotrace && ow_in_effect) {
1409		if (cflag) {
1410			(void) mutex_lock(&count_lock);
1411			scp = Cp->syscount[ow_syscall];
1412			if (ow_subcode != -1)
1413				scp += ow_subcode;
1414			scp->count++;
1415			accumulate(&scp->stime,
1416			    &Lsp->pr_stime, &pri->syslast);
1417			accumulate(&Cp->usrtotal,
1418			    &Lsp->pr_utime, &pri->usrlast);
1419			pri->syslast = Lsp->pr_stime;
1420			pri->usrlast = Lsp->pr_utime;
1421			(void) mutex_unlock(&count_lock);
1422		} else if (Eflag) {
1423			putpname(pri);
1424			timestamp(pri);
1425			(void) printf("%s\n", ow_string);
1426			free(ow_string);
1427			ow_string = NULL;
1428			pri->syslast = Lsp->pr_stime;
1429		}
1430		ow_in_effect = 0;
1431		Psetsysentry(Proc, &running_set);
1432	}
1433
1434	if (Lstate(Lwp) == PS_UNDEAD || Lstate(Lwp) == PS_LOST) {
1435		/*
1436		 * The victim thread has exited or we lost control of
1437		 * the process.  Remove ourself from the list of all
1438		 * truss threads and notify everyone waiting for this.
1439		 */
1440		lwpid_t my_id = thr_self();
1441		int i;
1442
1443		for (i = 0; i < truss_maxlwp; i++) {
1444			if (truss_lwpid[i] == my_id) {
1445				truss_lwpid[i] = 0;
1446				break;
1447			}
1448		}
1449		if (--truss_nlwp != 0) {
1450			(void) cond_broadcast(&truss_cv);
1451		} else {
1452			/*
1453			 * The last truss worker thread is terminating.
1454			 * The address space is gone (UNDEAD) or is
1455			 * inaccessible (LOST) so we cannot clear the
1456			 * breakpoints.  Just report the htable stats.
1457			 */
1458			report_htable_stats();
1459		}
1460	} else {
1461		/*
1462		 * The victim thread is not a zombie thread, and we have not
1463		 * lost control of the process.  We must have gotten here due
1464		 * to (leave_hung || leave_it_hung || interrupt || sigusr1).
1465		 * In these cases, we must carefully uninstrument the process
1466		 * and either set it running or leave it stopped and abandoned.
1467		 */
1468		static int nstopped = 0;
1469		static int cleared = 0;
1470
1471		if (leave_it_hung)
1472			leave_hung = TRUE;
1473		if ((leave_hung | interrupt | sigusr1) == 0)
1474			abend("(leave_hung | interrupt | sigusr1) == 0", NULL);
1475
1476		/*
1477		 * The first truss thread through here needs to instruct all
1478		 * application threads to stop -- they're not necessarily
1479		 * going to stop on their own.
1480		 */
1481		if (nstopped++ == 0)
1482			(void) Pdstop(Proc);
1483
1484		/*
1485		 * Notify all other worker threads about the reason
1486		 * for being here (leave_hung || interrupt || sigusr1).
1487		 */
1488		broadcast_signals();
1489
1490		/*
1491		 * Once the last thread has reached this point, then and
1492		 * only then is it safe to remove breakpoints and other
1493		 * instrumentation.  Since breakpoints are executed without
1494		 * truss_lock held, a monitor thread can't exit until all
1495		 * breakpoints have been removed, and we can't be sure the
1496		 * procedure to execute a breakpoint won't temporarily
1497		 * reinstall a breakpont.  Accordingly, we need to wait
1498		 * until all threads are in a known state.
1499		 */
1500		while (nstopped != truss_nlwp)
1501			(void) cond_wait(&truss_cv, &truss_lock);
1502
1503		/*
1504		 * All truss threads have reached this point.
1505		 * One of them clears the breakpoints and
1506		 * wakes up everybody else to finish up.
1507		 */
1508		if (cleared++ == 0) {
1509			/*
1510			 * All threads should already be stopped,
1511			 * but just to be safe...
1512			 */
1513			(void) Pstop(Proc, MILLISEC);
1514			clear_breakpoints();
1515			(void) Psysexit(Proc, SYS_vfork, FALSE);
1516			(void) Psysexit(Proc, SYS_forksys, FALSE);
1517			(void) Punsetflags(Proc, PR_FORK);
1518			Psync(Proc);
1519			fflag = 0;
1520			(void) cond_broadcast(&truss_cv);
1521		}
1522
1523		if (!leave_hung && Lstate(Lwp) == PS_STOP)
1524			(void) Lsetrun(Lwp, 0, 0);
1525	}
1526
1527	(void) Lfree(Lwp);
1528	(void) mutex_unlock(&truss_lock);
1529	return (NULL);
1530}
1531
1532/*
1533 * Give a base date for time stamps, adjusted to the
1534 * stop time of the selected (first or created) process.
1535 */
1536void
1537setup_basetime(hrtime_t basehrtime, struct timeval *basedate)
1538{
1539	const pstatus_t *Psp = Pstatus(Proc);
1540	(void) mutex_lock(&count_lock);
1541	Cp->basetime = Psp->pr_lwp.pr_tstamp;
1542	(void) mutex_unlock(&count_lock);
1543
1544	if ((dflag|Dflag) && !cflag) {
1545		const struct tm *ptm;
1546		const char *ptime;
1547		const char *pdst;
1548		hrtime_t delta = basehrtime -
1549		    ((hrtime_t)Cp->basetime.tv_sec * NANOSEC +
1550		    Cp->basetime.tv_nsec);
1551
1552		if (delta > 0) {
1553			basedate->tv_sec -= (time_t)(delta / NANOSEC);
1554			basedate->tv_usec -= (delta % NANOSEC) / 1000;
1555			if (basedate->tv_usec < 0) {
1556				basedate->tv_sec--;
1557				basedate->tv_usec += MICROSEC;
1558			}
1559		}
1560		ptm = localtime(&basedate->tv_sec);
1561		ptime = asctime(ptm);
1562		if ((pdst = tzname[ptm->tm_isdst ? 1 : 0]) == NULL)
1563			pdst = "???";
1564		if (dflag) {
1565			(void) printf(
1566			    "Base time stamp:  %ld.%4.4ld  [ %.20s%s %.4s ]\n",
1567			    basedate->tv_sec, basedate->tv_usec / 100,
1568			    ptime, pdst, ptime + 20);
1569			Flush();
1570		}
1571	}
1572}
1573
1574/*
1575 * Performs per-process initializations. If truss is following a victim
1576 * process it will fork additional truss processes to follow new processes
1577 * created.  Here is where each new truss process gets its per-process data
1578 * initialized.
1579 */
1580
1581void
1582per_proc_init()
1583{
1584	void *pmem;
1585	struct timeval basedate;
1586	hrtime_t basehrtime;
1587	struct syscount *scp;
1588	int i;
1589	timestruc_t c_basetime;
1590
1591	/* Make sure we only configure the basetime for the first truss proc */
1592
1593	if (Cp == NULL) {
1594		pmem = my_malloc(sizeof (struct counts) + maxsyscalls() *
1595		    sizeof (struct syscount), NULL);
1596		Cp = (struct counts *)pmem;
1597		basehrtime = gethrtime();
1598		(void) gettimeofday(&basedate, NULL);
1599		setup_basetime(basehrtime, &basedate);
1600	}
1601
1602	c_basetime = Cp->basetime;
1603
1604	(void) memset(Cp, 0, sizeof (struct counts) + maxsyscalls() *
1605	    sizeof (struct syscount));
1606
1607	Cp->basetime = c_basetime;
1608
1609	if (fcall_tbl != NULL)
1610		destroy_hash(fcall_tbl);
1611	fcall_tbl = init_hash(4096);
1612
1613	(void) mutex_lock(&count_lock);
1614	scp = (struct syscount *)(Cp + 1);
1615	for (i = 0; i <= PRMAXSYS; i++) {
1616		Cp->syscount[i] = scp;
1617		scp += nsubcodes(i);
1618	}
1619	(void) mutex_unlock(&count_lock);
1620}
1621
1622
1623/*
1624 * Writes child state to a tempfile where it can be read and
1625 * accumulated by the parent process. The file descriptor is shared
1626 * among the processes.  Ordering of writes does not matter, it is, however,
1627 * necessary to ensure that all writes are atomic.
1628 */
1629
1630void
1631child_to_file()
1632{
1633	hiter_t *itr;
1634	hentry_t *ntry;
1635	hdntry_t fentry;
1636	char *s = NULL;
1637	char *t = NULL;
1638	unsigned char *buf = NULL;
1639	size_t bufsz = 0;
1640	size_t i = 0;
1641	size_t j = 0;
1642
1643	/* ensure that we are in fact a child process */
1644	if (!descendent)
1645		return;
1646
1647	/* enumerate fcall_tbl (tbl locked until freed) */
1648	if (Dynpat != NULL) {
1649		itr = iterate_hash(fcall_tbl);
1650
1651		ntry = iter_next(itr);
1652		while (ntry != NULL) {
1653			fentry.type = HD_hashntry;
1654			fentry.count = ntry->count;
1655			s = ntry->key;
1656			t = ntry->lib;
1657			i = strlen(s) + 1;
1658			j = strlen(t) + 1;
1659			fentry.sz_key = i;
1660			fentry.sz_lib = j;
1661			if (i + sizeof (fentry) > bufsz) {
1662				buf = my_realloc(buf, i + j + sizeof (fentry),
1663				    NULL);
1664				bufsz = i + j + sizeof (fentry);
1665			}
1666			(void) memcpy(buf, &fentry, sizeof (fentry));
1667			(void) strlcpy((char *)(buf + sizeof (fentry)), t, j);
1668			(void) strlcpy((char *)(buf + sizeof (fentry) + j),
1669			    s, i);
1670			if (write(sfd, buf, sizeof (fentry) + i + j) == -1)
1671				abend("Error writing to tmp file", NULL);
1672			ntry = iter_next(itr);
1673		}
1674		iter_free(itr);
1675	}
1676
1677	/* Now write the count/syscount structs down */
1678	bufsz = sizeof (fentry) + (sizeof (struct counts) + maxsyscalls() *
1679	    sizeof (struct syscount));
1680	buf = my_realloc(buf, bufsz, NULL);
1681	fentry.type = HD_cts_syscts;
1682	fentry.count = 0;	/* undefined, really */
1683	fentry.sz_key = bufsz - sizeof (fentry);
1684	fentry.sz_lib = 0;	/* also undefined */
1685	(void) memcpy(buf, &fentry, sizeof (fentry));
1686	(void) memcpy((char *)(buf + sizeof (fentry)), Cp,
1687	    bufsz - sizeof (fentry));
1688	if (write(sfd, buf, bufsz) == -1)
1689		abend("Error writing cts/syscts to tmpfile", NULL);
1690
1691	free(buf);
1692}
1693
1694/*
1695 * The following reads entries from the tempfile back to the parent
1696 * so that information can be collected and summed for overall statistics.
1697 * This reads records out of the tempfile.  If they are hash table entries,
1698 * the record is merged with the hash table kept by the parent process.
1699 * If the information is a struct count/struct syscount pair, they are
1700 * copied and added into the count/syscount array kept by the parent.
1701 */
1702
1703void
1704file_to_parent()
1705{
1706	hdntry_t ntry;
1707	char *s = NULL;
1708	char *t = NULL;
1709	size_t c_offset = 0;
1710	size_t filesz;
1711	size_t t_strsz = 0;
1712	size_t s_strsz = 0;
1713	struct stat fsi;
1714
1715	if (descendent)
1716		return;
1717
1718	if (fstat(sfd, &fsi) == -1)
1719		abend("Error stat-ing tempfile", NULL);
1720	filesz = fsi.st_size;
1721
1722	while (c_offset < filesz) {
1723		/* first get hdntry */
1724		if (pread(sfd, &ntry, sizeof (hdntry_t), c_offset) !=
1725		    sizeof (hdntry_t))
1726			abend("Unable to perform full read of hdntry", NULL);
1727		c_offset += sizeof (hdntry_t);
1728
1729		switch (ntry.type) {
1730		case HD_hashntry:
1731
1732			/* first get lib string */
1733			if (ntry.sz_lib > t_strsz) {
1734				t = my_realloc(t, ntry.sz_lib, NULL);
1735				t_strsz = ntry.sz_lib;
1736			}
1737
1738			(void) memset(t, 0, t_strsz);
1739
1740			/* now actually get the string */
1741			if (pread(sfd, t, ntry.sz_lib, c_offset) != ntry.sz_lib)
1742				abend("Unable to perform full read of lib str",
1743				    NULL);
1744			c_offset += ntry.sz_lib;
1745
1746			/* now get key string */
1747
1748			if (ntry.sz_key > s_strsz) {
1749				s = my_realloc(s, ntry.sz_key, NULL);
1750				s_strsz = ntry.sz_key;
1751			}
1752			(void) memset(s, 0, s_strsz);
1753			if (pread(sfd, s, ntry.sz_key, c_offset) != ntry.sz_key)
1754				abend("Unable to perform full read of key str",
1755				    NULL);
1756			c_offset += ntry.sz_key;
1757
1758			add_fcall(fcall_tbl, t, s, ntry.count);
1759			break;
1760
1761		case HD_cts_syscts:
1762		{
1763			struct counts *ncp;
1764			size_t bfsz = sizeof (struct counts) + maxsyscalls()
1765			    * sizeof (struct syscount);
1766			int i;
1767			struct syscount *sscp;
1768
1769			if (ntry.sz_key != bfsz)
1770				abend("cts/syscts size does not sanity check",
1771				    NULL);
1772			ncp = my_malloc(ntry.sz_key, NULL);
1773
1774			if (pread(sfd, ncp, ntry.sz_key, c_offset) !=
1775			    ntry.sz_key)
1776				abend("Unable to perform full read of cts",
1777				    NULL);
1778			c_offset += ntry.sz_key;
1779
1780			sscp = (struct syscount *)(ncp + 1);
1781
1782			(void) mutex_lock(&count_lock);
1783
1784			Cp->usrtotal.tv_sec += ncp->usrtotal.tv_sec;
1785			Cp->usrtotal.tv_nsec += ncp->usrtotal.tv_nsec;
1786			if (Cp->usrtotal.tv_nsec >= NANOSEC) {
1787				Cp->usrtotal.tv_nsec -= NANOSEC;
1788				Cp->usrtotal.tv_sec++;
1789			}
1790			for (i = 0; i <= PRMAXSYS; i++) {
1791				ncp->syscount[i] = sscp;
1792				sscp += nsubcodes(i);
1793			}
1794
1795			for (i = 0; i <= PRMAXFAULT; i++) {
1796				Cp->fltcount[i] += ncp->fltcount[i];
1797			}
1798
1799			for (i = 0; i <= PRMAXSIG; i++) {
1800				Cp->sigcount[i] += ncp->sigcount[i];
1801			}
1802
1803			for (i = 0; i <= PRMAXSYS; i++) {
1804				struct syscount *scp = Cp->syscount[i];
1805				struct syscount *nscp = ncp->syscount[i];
1806				int n = nsubcodes(i);
1807				int subcode;
1808
1809				for (subcode = 0; subcode < n; subcode++,
1810				    scp++, nscp++) {
1811					scp->count += nscp->count;
1812					scp->error += nscp->error;
1813					scp->stime.tv_sec += nscp->stime.tv_sec;
1814					scp->stime.tv_nsec +=
1815					    nscp->stime.tv_nsec;
1816					if (scp->stime.tv_nsec >= NANOSEC) {
1817						scp->stime.tv_nsec -= NANOSEC;
1818						scp->stime.tv_sec++;
1819					}
1820				}
1821			}
1822			(void) mutex_unlock(&count_lock);
1823			free(ncp);
1824			break;
1825		}
1826		default:
1827
1828			abend("Unknown file entry type encountered", NULL);
1829			break;
1830
1831		}
1832
1833		if (fstat(sfd, &fsi) == -1)
1834			abend("Error stat-ing tempfile", NULL);
1835		filesz = fsi.st_size;
1836	}
1837	if (s != NULL)
1838		free(s);
1839	if (t != NULL)
1840		free(t);
1841}
1842
1843void
1844make_pname(private_t *pri, id_t tid)
1845{
1846	if (!cflag) {
1847		int ff = (fflag || ngrab > 1);
1848		int lf = (lflag | tid | (Thr_agent != NULL) | (truss_nlwp > 1));
1849		pid_t pid = Pstatus(Proc)->pr_pid;
1850		id_t lwpid = pri->lwpstat->pr_lwpid;
1851
1852		if (ff != pri->pparam.ff ||
1853		    lf != pri->pparam.lf ||
1854		    pid != pri->pparam.pid ||
1855		    lwpid != pri->pparam.lwpid ||
1856		    tid != pri->pparam.tid) {
1857			char *s = pri->pname;
1858
1859			if (ff)
1860				s += sprintf(s, "%d", (int)pid);
1861			if (lf)
1862				s += sprintf(s, "/%d", (int)lwpid);
1863			if (tid)
1864				s += sprintf(s, "@%d", (int)tid);
1865			if (ff || lf)
1866				*s++ = ':', *s++ = '\t';
1867			if (ff && lf && s < pri->pname + 9)
1868				*s++ = '\t';
1869			*s = '\0';
1870			pri->pparam.ff = ff;
1871			pri->pparam.lf = lf;
1872			pri->pparam.pid = pid;
1873			pri->pparam.lwpid = lwpid;
1874			pri->pparam.tid = tid;
1875		}
1876	}
1877}
1878
1879/*
1880 * Print the pri->pname[] string, if any.
1881 */
1882void
1883putpname(private_t *pri)
1884{
1885	if (pri->pname[0])
1886		(void) fputs(pri->pname, stdout);
1887}
1888
1889/*
1890 * Print the timestamp, if requested (-d, -D, or -E).
1891 */
1892void
1893timestamp(private_t *pri)
1894{
1895	const lwpstatus_t *Lsp = pri->lwpstat;
1896	int seconds;
1897	int fraction;
1898
1899	if (!(dflag|Dflag|Eflag) || !(Lsp->pr_flags & PR_STOPPED))
1900		return;
1901
1902	seconds = Lsp->pr_tstamp.tv_sec - Cp->basetime.tv_sec;
1903	fraction = Lsp->pr_tstamp.tv_nsec - Cp->basetime.tv_nsec;
1904	if (fraction < 0) {
1905		seconds--;
1906		fraction += NANOSEC;
1907	}
1908	/* fraction in 1/10 milliseconds, rounded up */
1909	fraction = (fraction + 50000) / 100000;
1910	if (fraction >= (MILLISEC * 10)) {
1911		seconds++;
1912		fraction -= (MILLISEC * 10);
1913	}
1914
1915	if (dflag)		/* time stamp */
1916		(void) printf("%2d.%4.4d\t", seconds, fraction);
1917
1918	if (Dflag) {		/* time delta */
1919		int oseconds = pri->seconds;
1920		int ofraction = pri->fraction;
1921
1922		pri->seconds = seconds;
1923		pri->fraction = fraction;
1924		seconds -= oseconds;
1925		fraction -= ofraction;
1926		if (fraction < 0) {
1927			seconds--;
1928			fraction += (MILLISEC * 10);
1929		}
1930		(void) printf("%2d.%4.4d\t", seconds, fraction);
1931	}
1932
1933	if (Eflag) {
1934		seconds = Lsp->pr_stime.tv_sec - pri->syslast.tv_sec;
1935		fraction = Lsp->pr_stime.tv_nsec - pri->syslast.tv_nsec;
1936
1937		if (fraction < 0) {
1938			seconds--;
1939			fraction += NANOSEC;
1940		}
1941		/* fraction in 1/10 milliseconds, rounded up */
1942		fraction = (fraction + 50000) / 100000;
1943		if (fraction >= (MILLISEC * 10)) {
1944			seconds++;
1945			fraction -= (MILLISEC * 10);
1946		}
1947		(void) printf("%2d.%4.4d\t", seconds, fraction);
1948	}
1949}
1950
1951/*
1952 * Create output file, being careful about
1953 * suid/sgid and file descriptor 0, 1, 2 issues.
1954 */
1955int
1956xcreat(char *path)
1957{
1958	int fd;
1959	int mode = 0666;
1960
1961	if (Euid == Ruid && Egid == Rgid)	/* not set-id */
1962		fd = creat(path, mode);
1963	else if (access(path, F_OK) != 0) {	/* file doesn't exist */
1964		/* if directory permissions OK, create file & set ownership */
1965
1966		char *dir;
1967		char *p;
1968		char dot[4];
1969
1970		/* generate path for directory containing file */
1971		if ((p = strrchr(path, '/')) == NULL) {	/* no '/' */
1972			p = dir = dot;
1973			*p++ = '.';		/* current directory */
1974			*p = '\0';
1975		} else if (p == path) {			/* leading '/' */
1976			p = dir = dot;
1977			*p++ = '/';		/* root directory */
1978			*p = '\0';
1979		} else {				/* embedded '/' */
1980			dir = path;		/* directory path */
1981			*p = '\0';
1982		}
1983
1984		if (access(dir, W_OK|X_OK) != 0) {
1985			/* not writeable/searchable */
1986			*p = '/';
1987			fd = -1;
1988		} else {	/* create file and set ownership correctly */
1989			*p = '/';
1990			if ((fd = creat(path, mode)) >= 0)
1991				(void) chown(path, (int)Ruid, (int)Rgid);
1992		}
1993	} else if (access(path, W_OK) != 0)	/* file not writeable */
1994		fd = -1;
1995	else
1996		fd = creat(path, mode);
1997
1998	/*
1999	 * Make sure it's not one of 0, 1, or 2.
2000	 * This allows truss to work when spawned by init(1m).
2001	 */
2002	if (0 <= fd && fd <= 2) {
2003		int dfd = fcntl(fd, F_DUPFD, 3);
2004		(void) close(fd);
2005		fd = dfd;
2006	}
2007
2008	/*
2009	 * Mark it close-on-exec so created processes don't inherit it.
2010	 */
2011	if (fd >= 0)
2012		(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
2013
2014	return (fd);
2015}
2016
2017void
2018setoutput(int ofd)
2019{
2020	if (ofd < 0) {
2021		(void) close(1);
2022		(void) fcntl(2, F_DUPFD, 1);
2023	} else if (ofd != 1) {
2024		(void) close(1);
2025		(void) fcntl(ofd, F_DUPFD, 1);
2026		(void) close(ofd);
2027		/* if no stderr, make it the same file */
2028		if ((ofd = dup(2)) < 0)
2029			(void) fcntl(1, F_DUPFD, 2);
2030		else
2031			(void) close(ofd);
2032	}
2033}
2034
2035/*
2036 * Accumulate time differencies:  a += e - s;
2037 */
2038void
2039accumulate(timestruc_t *ap, const timestruc_t *ep, const timestruc_t *sp)
2040{
2041	ap->tv_sec += ep->tv_sec - sp->tv_sec;
2042	ap->tv_nsec += ep->tv_nsec - sp->tv_nsec;
2043	if (ap->tv_nsec >= NANOSEC) {
2044		ap->tv_nsec -= NANOSEC;
2045		ap->tv_sec++;
2046	} else if (ap->tv_nsec < 0) {
2047		ap->tv_nsec += NANOSEC;
2048		ap->tv_sec--;
2049	}
2050}
2051
2052int
2053lib_sort(const void *p1, const void *p2)
2054{
2055	int cmpr = 0;
2056	long i;
2057	long j;
2058
2059	hentry_t *t1 = (hentry_t *)p1;
2060	hentry_t *t2 = (hentry_t *)p2;
2061
2062	char *p = t1->lib;
2063	char *q = t2->lib;
2064
2065	if ((cmpr = strcmp(p, q)) == 0) {
2066		i = t1->count;
2067		j = t2->count;
2068		if (i > j)
2069			return (-1);
2070		else if (i < j)
2071			return (1);
2072		else {
2073			p = t1->key;
2074			q = t2->key;
2075			return (strcmp(p, q));
2076		}
2077	} else
2078		return (cmpr);
2079}
2080
2081void
2082report(private_t *pri, time_t lapse)	/* elapsed time, clock ticks */
2083{
2084	int i;
2085	long count;
2086	const char *name;
2087	long error;
2088	long total;
2089	long errtot;
2090	timestruc_t tickzero;
2091	timestruc_t ticks;
2092	timestruc_t ticktot;
2093
2094	if (descendent)
2095		return;
2096
2097	for (i = 0, total = 0; i <= PRMAXFAULT && !interrupt; i++) {
2098		if ((count = Cp->fltcount[i]) != 0) {
2099			if (total == 0)		/* produce header */
2100				(void) printf("faults -------------\n");
2101
2102			name = proc_fltname(i, pri->flt_name,
2103			    sizeof (pri->flt_name));
2104
2105			(void) printf("%s%s\t%4ld\n", name,
2106			    (((int)strlen(name) < 8)?
2107			    (const char *)"\t" : (const char *)""),
2108			    count);
2109			total += count;
2110		}
2111	}
2112	if (total && !interrupt)
2113		(void) printf("total:\t\t%4ld\n\n", total);
2114
2115	for (i = 0, total = 0; i <= PRMAXSIG && !interrupt; i++) {
2116		if ((count = Cp->sigcount[i]) != 0) {
2117			if (total == 0)		/* produce header */
2118				(void) printf("signals ------------\n");
2119			name = signame(pri, i);
2120			(void) printf("%s%s\t%4ld\n", name,
2121			    (((int)strlen(name) < 8)?
2122			    (const char *)"\t" : (const char *)""),
2123			    count);
2124			total += count;
2125		}
2126	}
2127	if (total && !interrupt)
2128		(void) printf("total:\t\t%4ld\n\n", total);
2129
2130	if ((Dynpat != NULL) && !interrupt) {
2131		size_t elem = elements_in_table(fcall_tbl);
2132		hiter_t *itr = iterate_hash(fcall_tbl);
2133		hentry_t *tmp = iter_next(itr);
2134		hentry_t *stbl = my_malloc(elem * sizeof (hentry_t), NULL);
2135		i = 0;
2136		while ((tmp != NULL) && (i < elem)) {
2137			stbl[i].prev = tmp->prev;
2138			stbl[i].next = tmp->next;
2139			stbl[i].lib = tmp->lib;
2140			stbl[i].key = tmp->key;
2141			stbl[i].count = tmp->count;
2142			tmp = iter_next(itr);
2143			i++;
2144		}
2145		qsort((void *)stbl, elem, sizeof (hentry_t),
2146		    lib_sort);
2147		(void) printf(
2148		    "\n%-20s %-40s %s\n", "Library:", "Function", "calls");
2149		for (i = 0; i < elem; i++) {
2150			(void) printf("%-20s %-40s %ld\n", stbl[i].lib,
2151			    stbl[i].key, stbl[i].count);
2152		}
2153		iter_free(itr);
2154		free(stbl);
2155		itr = NULL;
2156	}
2157
2158	if (!interrupt)
2159		(void) printf(
2160		"\nsyscall               seconds   calls  errors\n");
2161
2162	total = errtot = 0;
2163	tickzero.tv_sec = ticks.tv_sec = ticktot.tv_sec = 0;
2164	tickzero.tv_nsec = ticks.tv_nsec = ticktot.tv_nsec = 0;
2165	for (i = 0; i <= PRMAXSYS && !interrupt; i++) {
2166		struct syscount *scp = Cp->syscount[i];
2167		int n = nsubcodes(i);
2168		int subcode;
2169
2170		for (subcode = 0; subcode < n; subcode++, scp++) {
2171			if ((count = scp->count) != 0 || scp->error) {
2172				(void) printf("%-19.19s ",
2173				    sysname(pri, i, subcode));
2174
2175				ticks = scp->stime;
2176				accumulate(&ticktot, &ticks, &tickzero);
2177				prtim(&ticks);
2178
2179				(void) printf(" %7ld", count);
2180				if ((error = scp->error) != 0)
2181					(void) printf(" %7ld", error);
2182				(void) fputc('\n', stdout);
2183				total += count;
2184				errtot += error;
2185			}
2186		}
2187	}
2188
2189	if (!interrupt) {
2190		(void) printf(
2191		"                     --------  ------   ----\n");
2192		(void) printf("sys totals:         ");
2193		prtim(&ticktot);
2194		(void) printf(" %7ld %6ld\n", total, errtot);
2195	}
2196
2197	if (!interrupt) {
2198		(void) printf("usr time:           ");
2199		prtim(&Cp->usrtotal);
2200		(void) fputc('\n', stdout);
2201	}
2202
2203	if (!interrupt) {
2204		int hz = (int)sysconf(_SC_CLK_TCK);
2205
2206		ticks.tv_sec = lapse / hz;
2207		ticks.tv_nsec = (lapse % hz) * (1000000000 / hz);
2208		(void) printf("elapsed:            ");
2209		prtim(&ticks);
2210		(void) fputc('\n', stdout);
2211	}
2212}
2213
2214void
2215prtim(timestruc_t *tp)
2216{
2217	time_t sec;
2218
2219	if ((sec = tp->tv_sec) != 0)			/* whole seconds */
2220		(void) printf("%5lu", sec);
2221	else
2222		(void) printf("     ");
2223
2224	(void) printf(".%3.3ld", tp->tv_nsec/1000000);	/* fraction */
2225}
2226
2227/*
2228 * Gather process id's.
2229 * Return 0 on success, != 0 on failure.
2230 */
2231void
2232pids(char *arg, proc_set_t *grab)
2233{
2234	pid_t pid = -1;
2235	int i;
2236	const char *lwps = NULL;
2237
2238	if ((pid = proc_arg_xpsinfo(arg, PR_ARG_PIDS, NULL, &i, &lwps)) < 0) {
2239		(void) fprintf(stderr, "%s: cannot trace '%s': %s\n",
2240		    command, arg, Pgrab_error(i));
2241		return;
2242	}
2243
2244	for (i = 0; i < ngrab; i++)
2245		if (grab[i].pid == pid)	/* duplicate */
2246			break;
2247
2248	if (i == ngrab) {
2249		grab[ngrab].pid = pid;
2250		grab[ngrab].lwps = lwps;
2251		ngrab++;
2252	} else {
2253		(void) fprintf(stderr, "%s: duplicate process-id ignored: %d\n",
2254		    command, (int)pid);
2255	}
2256}
2257
2258/*
2259 * Report psargs string.
2260 */
2261void
2262psargs(private_t *pri)
2263{
2264	pid_t pid = Pstatus(Proc)->pr_pid;
2265	psinfo_t psinfo;
2266
2267	if (proc_get_psinfo(pid, &psinfo) == 0)
2268		(void) printf("%spsargs: %.64s\n",
2269		    pri->pname, psinfo.pr_psargs);
2270	else {
2271		perror("psargs()");
2272		(void) printf("%s\t*** Cannot read psinfo file for pid %d\n",
2273		    pri->pname, (int)pid);
2274	}
2275}
2276
2277char *
2278fetchstring(private_t *pri, long addr, int maxleng)
2279{
2280	int nbyte;
2281	int leng = 0;
2282	char string[41];
2283
2284	string[40] = '\0';
2285	if (pri->str_bsize == 0)  /* initial allocation of string buffer */
2286		pri->str_buffer =
2287		    my_malloc(pri->str_bsize = 16, "string buffer");
2288	*pri->str_buffer = '\0';
2289
2290	for (nbyte = 40; nbyte == 40 && leng < maxleng; addr += 40) {
2291		if ((nbyte = Pread(Proc, string, 40, addr)) <= 0)
2292			return (leng? pri->str_buffer : NULL);
2293		if (nbyte > 0 &&
2294		    (nbyte = strlen(string)) > 0) {
2295			while (leng + nbyte >= pri->str_bsize)
2296				pri->str_buffer =
2297				    my_realloc(pri->str_buffer,
2298				    pri->str_bsize *= 2, "string buffer");
2299			(void) strcpy(pri->str_buffer+leng, string);
2300			leng += nbyte;
2301		}
2302	}
2303
2304	if (leng > maxleng)
2305		leng = maxleng;
2306	pri->str_buffer[leng] = '\0';
2307
2308	return (pri->str_buffer);
2309}
2310
2311static priv_set_t *
2312getset(prpriv_t *p, priv_ptype_t set)
2313{
2314	return ((priv_set_t *)
2315	    &p->pr_sets[priv_getsetbyname(set) * p->pr_setsize]);
2316}
2317
2318void
2319show_cred(private_t *pri, int new, int loadonly)
2320{
2321	prcred_t cred;
2322	prpriv_t *privs;
2323
2324	if (proc_get_cred(Pstatus(Proc)->pr_pid, &cred, 0) < 0) {
2325		perror("show_cred() - credential");
2326		(void) printf("%s\t*** Cannot get credentials\n", pri->pname);
2327		return;
2328	}
2329	if ((privs = proc_get_priv(Pstatus(Proc)->pr_pid)) == NULL) {
2330		perror("show_cred() - privileges");
2331		(void) printf("%s\t*** Cannot get privileges\n", pri->pname);
2332		return;
2333	}
2334
2335	if (!loadonly && !cflag && prismember(&trace, SYS_execve)) {
2336		if (new)
2337			credentials = cred;
2338		if ((new && cred.pr_ruid != cred.pr_suid) ||
2339		    cred.pr_ruid != credentials.pr_ruid ||
2340		    cred.pr_suid != credentials.pr_suid)
2341			(void) printf(
2342		"%s    *** SUID: ruid/euid/suid = %d / %d / %d  ***\n",
2343			    pri->pname,
2344			    (int)cred.pr_ruid,
2345			    (int)cred.pr_euid,
2346			    (int)cred.pr_suid);
2347		if ((new && cred.pr_rgid != cred.pr_sgid) ||
2348		    cred.pr_rgid != credentials.pr_rgid ||
2349		    cred.pr_sgid != credentials.pr_sgid)
2350			(void) printf(
2351		"%s    *** SGID: rgid/egid/sgid = %d / %d / %d  ***\n",
2352			    pri->pname,
2353			    (int)cred.pr_rgid,
2354			    (int)cred.pr_egid,
2355			    (int)cred.pr_sgid);
2356		if (privdata != NULL && cred.pr_euid != 0) {
2357			priv_set_t *npset = getset(privs, PRIV_PERMITTED);
2358			priv_set_t *opset = getset(privdata, PRIV_PERMITTED);
2359			char *s, *t;
2360			if (!priv_issubset(npset, opset)) {
2361				/* Use the to be freed privdata as scratch */
2362				priv_inverse(opset);
2363				priv_intersect(npset, opset);
2364				s = priv_set_to_str(opset, ',', PRIV_STR_SHORT);
2365				t = priv_set_to_str(npset, ',', PRIV_STR_SHORT);
2366				(void) printf("%s    *** FPRIV: P/E: %s ***\n",
2367				    pri->pname,
2368				    strlen(s) > strlen(t) ? t : s);
2369				free(s);
2370				free(t);
2371			}
2372		}
2373	}
2374
2375	if (privdata != NULL)
2376		free(privdata);
2377	credentials = cred;
2378	privdata = privs;
2379}
2380
2381/*
2382 * Take control of a child process.
2383 * We come here with truss_lock held.
2384 */
2385int
2386control(private_t *pri, pid_t pid)
2387{
2388	const pstatus_t *Psp;
2389	const lwpstatus_t *Lsp;
2390	pid_t childpid = 0;
2391	long flags;
2392	int rc;
2393
2394	(void) mutex_lock(&gps->fork_lock);
2395	while (gps->fork_pid != 0)
2396		(void) cond_wait(&gps->fork_cv, &gps->fork_lock);
2397	gps->fork_pid = getpid();	/* parent pid */
2398	if ((childpid = fork()) == -1) {
2399		(void) printf("%s\t*** Cannot fork() to control process #%d\n",
2400		    pri->pname, (int)pid);
2401		Flush();
2402		gps->fork_pid = 0;
2403		(void) cond_broadcast(&gps->fork_cv);
2404		(void) mutex_unlock(&gps->fork_lock);
2405		release(pri, pid);
2406		return (FALSE);
2407	}
2408
2409	if (childpid != 0) {
2410		/*
2411		 * The parent carries on, after a brief pause.
2412		 * The parent must wait until the child executes procadd(pid).
2413		 */
2414		while (gps->fork_pid != childpid)
2415			(void) cond_wait(&gps->fork_cv, &gps->fork_lock);
2416		gps->fork_pid = 0;
2417		(void) cond_broadcast(&gps->fork_cv);
2418		(void) mutex_unlock(&gps->fork_lock);
2419		return (FALSE);
2420	}
2421
2422	childpid = getpid();
2423	descendent = TRUE;
2424	exit_called = FALSE;
2425	Pfree(Proc);	/* forget old process */
2426
2427	/*
2428	 * The parent process owns the shared gps->fork_lock.
2429	 * The child must grab it again.
2430	 */
2431	(void) mutex_lock(&gps->fork_lock);
2432
2433	/*
2434	 * Child grabs the process and retains the tracing flags.
2435	 */
2436	if ((Proc = Pgrab(pid, PGRAB_RETAIN, &rc)) == NULL) {
2437		(void) fprintf(stderr,
2438		    "%s: cannot control child process, pid# %d: %s\n",
2439		    command, (int)pid, Pgrab_error(rc));
2440		gps->fork_pid = childpid;
2441		(void) cond_broadcast(&gps->fork_cv);
2442		(void) mutex_unlock(&gps->fork_lock);
2443		exit(2);
2444	}
2445
2446	per_proc_init();
2447	/*
2448	 * Add ourself to the set of truss processes
2449	 * and notify the parent to carry on.
2450	 */
2451	procadd(pid, NULL);
2452	gps->fork_pid = childpid;
2453	(void) cond_broadcast(&gps->fork_cv);
2454	(void) mutex_unlock(&gps->fork_lock);
2455
2456	/*
2457	 * We may have grabbed the child before it is fully stopped on exit
2458	 * from fork.  Wait one second (at most) for it to settle down.
2459	 */
2460	(void) Pwait(Proc, MILLISEC);
2461	if (Rdb_agent != NULL)
2462		Rdb_agent = Prd_agent(Proc);
2463
2464	Psp = Pstatus(Proc);
2465	Lsp = &Psp->pr_lwp;
2466	pri->lwpstat = Lsp;
2467	data_model = Psp->pr_dmodel;
2468
2469	make_pname(pri, 0);
2470
2471	pri->syslast = Psp->pr_stime;
2472	pri->usrlast = Psp->pr_utime;
2473
2474	flags = PR_FORK | PR_ASYNC;
2475	if (Dynpat != NULL)
2476		flags |= PR_BPTADJ;	/* needed for x86 */
2477	(void) Psetflags(Proc, flags);
2478
2479	return (TRUE);
2480}
2481
2482/*
2483 * Take control of an existing process.
2484 */
2485int
2486grabit(private_t *pri, proc_set_t *set)
2487{
2488	const pstatus_t *Psp;
2489	const lwpstatus_t *Lsp;
2490	int gcode;
2491
2492	/*
2493	 * Don't force the takeover unless the -F option was specified.
2494	 */
2495	if ((Proc = Pgrab(set->pid, Fflag, &gcode)) == NULL) {
2496		(void) fprintf(stderr, "%s: %s: %d\n",
2497		    command, Pgrab_error(gcode), (int)set->pid);
2498		pri->lwpstat = NULL;
2499		return (FALSE);
2500	}
2501	Psp = Pstatus(Proc);
2502	Lsp = &Psp->pr_lwp;
2503	pri->lwpstat = Lsp;
2504
2505	make_pname(pri, 0);
2506
2507	data_model = Psp->pr_dmodel;
2508	pri->syslast = Psp->pr_stime;
2509	pri->usrlast = Psp->pr_utime;
2510
2511	if (fflag || Dynpat != NULL)
2512		(void) Psetflags(Proc, PR_FORK);
2513	else
2514		(void) Punsetflags(Proc, PR_FORK);
2515	procadd(set->pid, set->lwps);
2516	show_cred(pri, TRUE, FALSE);
2517	return (TRUE);
2518}
2519
2520/*
2521 * Release process from control.
2522 */
2523void
2524release(private_t *pri, pid_t pid)
2525{
2526	/*
2527	 * The process in question is the child of a traced process.
2528	 * We are here to turn off the inherited tracing flags.
2529	 */
2530	int fd;
2531	char ctlname[100];
2532	long ctl[2];
2533
2534	ctl[0] = PCSET;
2535	ctl[1] = PR_RLC;
2536
2537	/* process is freshly forked, no need for exclusive open */
2538	(void) sprintf(ctlname, "/proc/%d/ctl", (int)pid);
2539	if ((fd = open(ctlname, O_WRONLY)) < 0 ||
2540	    write(fd, (char *)ctl, sizeof (ctl)) < 0) {
2541		perror("release()");
2542		(void) printf(
2543		    "%s\t*** Cannot release child process, pid# %d\n",
2544		    pri->pname, (int)pid);
2545		Flush();
2546	}
2547	if (fd >= 0)	/* run-on-last-close sets the process running */
2548		(void) close(fd);
2549}
2550
2551void
2552intr(int sig)
2553{
2554	/*
2555	 * SIGUSR1 is special.  It is used by one truss process to tell
2556	 * another truss process to release its controlled process.
2557	 * SIGUSR2 is also special.  It is used to wake up threads waiting
2558	 * for a victim lwp to stop after an event that will leave the
2559	 * process hung (stopped and abandoned) has occurred.
2560	 */
2561	if (sig == SIGUSR1) {
2562		sigusr1 = TRUE;
2563	} else if (sig == SIGUSR2) {
2564		void *value;
2565		private_t *pri;
2566		struct ps_lwphandle *Lwp;
2567
2568		if (thr_getspecific(private_key, &value) == 0 &&
2569		    (pri = value) != NULL &&
2570		    (Lwp = pri->Lwp) != NULL)
2571			(void) Lstop(Lwp, MILLISEC / 10);
2572	} else {
2573		interrupt = sig;
2574	}
2575}
2576
2577void
2578errmsg(const char *s, const char *q)
2579{
2580	char msg[512];
2581
2582	if (s || q) {
2583		msg[0] = '\0';
2584		if (command) {
2585			(void) strcpy(msg, command);
2586			(void) strcat(msg, ": ");
2587		}
2588		if (s)
2589			(void) strcat(msg, s);
2590		if (q)
2591			(void) strcat(msg, q);
2592		(void) strcat(msg, "\n");
2593		(void) write(2, msg, (size_t)strlen(msg));
2594	}
2595}
2596
2597void
2598abend(const char *s, const char *q)
2599{
2600	(void) thr_sigsetmask(SIG_SETMASK, &fillset, NULL);
2601	if (Proc) {
2602		Flush();
2603		errmsg(s, q);
2604		clear_breakpoints();
2605		(void) Punsetflags(Proc, PR_ASYNC);
2606		Prelease(Proc, created? PRELEASE_KILL : PRELEASE_CLEAR);
2607		procdel();
2608		(void) wait4all();
2609	} else {
2610		errmsg(s, q);
2611	}
2612	exit(2);
2613}
2614
2615/*
2616 * Allocate memory.
2617 * If allocation fails then print a message and abort.
2618 */
2619void *
2620my_realloc(void *buf, size_t size, const char *msg)
2621{
2622	if ((buf = realloc(buf, size)) == NULL) {
2623		if (msg != NULL)
2624			abend("cannot allocate ", msg);
2625		else
2626			abend("memory allocation failure", NULL);
2627	}
2628
2629	return (buf);
2630}
2631
2632void *
2633my_calloc(size_t nelem, size_t elsize, const char *msg)
2634{
2635	void *buf = NULL;
2636
2637	if ((buf = calloc(nelem, elsize)) == NULL) {
2638		if (msg != NULL)
2639			abend("cannot allocate ", msg);
2640		else
2641			abend("memory allocation failure", NULL);
2642	}
2643
2644	return (buf);
2645}
2646
2647void *
2648my_malloc(size_t size, const char *msg)
2649{
2650	return (my_realloc(NULL, size, msg));
2651}
2652
2653int
2654wait4all()
2655{
2656	int i;
2657	pid_t pid;
2658	int rc = 0;
2659	int status;
2660
2661	for (i = 0; i < 10; i++) {
2662		while ((pid = wait(&status)) != -1) {
2663			/* return exit() code of the created process */
2664			if (pid == created) {
2665				if (WIFEXITED(status))
2666					rc = WEXITSTATUS(status);
2667				else
2668					rc |= 0x80; /* +128 to indicate sig */
2669			}
2670		}
2671		if (errno != EINTR && errno != ERESTART)
2672			break;
2673	}
2674
2675	if (i >= 10)	/* repeated interrupts */
2676		rc = 2;
2677
2678	return (rc);
2679}
2680
2681void
2682letgo(private_t *pri)
2683{
2684	(void) printf("%s\t*** process otherwise traced, releasing ...\n",
2685	    pri->pname);
2686}
2687
2688/*
2689 * Test for empty set.
2690 * support routine used by isemptyset() macro.
2691 */
2692int
2693is_empty(const uint32_t *sp,	/* pointer to set (array of int32's) */
2694	size_t n)		/* number of int32's in set */
2695{
2696	if (n) {
2697		do {
2698			if (*sp++)
2699				return (FALSE);
2700		} while (--n);
2701	}
2702
2703	return (TRUE);
2704}
2705
2706/*
2707 * OR the second set into the first.
2708 * The sets must be the same size.
2709 */
2710void
2711or_set(uint32_t *sp1, const uint32_t *sp2, size_t n)
2712{
2713	if (n) {
2714		do {
2715			*sp1++ |= *sp2++;
2716		} while (--n);
2717	}
2718}
2719