1/*
2 * Copyright 1997 Sean Eric Fagan
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. All advertising materials mentioning features or use of this software
13 *    must display the following acknowledgement:
14 *	This product includes software developed by Sean Eric Fagan
15 * 4. Neither the name of the author may be used to endorse or promote
16 *    products derived from this software without specific prior written
17 *    permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#ifndef lint
33static const char rcsid[] =
34  "$FreeBSD$";
35#endif /* not lint */
36
37/*
38 * Linux/i386-specific system call handling.  Given how much of this code
39 * is taken from the freebsd equivalent, I can probably put even more of
40 * it in support routines that can be used by any personality support.
41 */
42
43#include <sys/types.h>
44#include <sys/ptrace.h>
45
46#include <machine/reg.h>
47#include <machine/psl.h>
48
49#include <errno.h>
50#include <fcntl.h>
51#include <signal.h>
52#include <stdio.h>
53#include <stdlib.h>
54#include <string.h>
55#include <time.h>
56#include <unistd.h>
57
58#include "truss.h"
59#include "syscall.h"
60#include "extern.h"
61
62#include "linux32_syscalls.h"
63
64static int nsyscalls =
65	sizeof(linux32_syscallnames) / sizeof(linux32_syscallnames[0]);
66
67/*
68 * This is what this particular file uses to keep track of a system call.
69 * It is probably not quite sufficient -- I can probably use the same
70 * structure for the various syscall personalities, and I also probably
71 * need to nest system calls (for signal handlers).
72 *
73 * 'struct syscall' describes the system call; it may be NULL, however,
74 * if we don't know about this particular system call yet.
75 */
76struct linux_syscall {
77	struct syscall *sc;
78	const char *name;
79	int number;
80	unsigned long args[5];
81	int nargs;	/* number of arguments -- *not* number of words! */
82	char **s_args;	/* the printable arguments */
83};
84
85static struct linux_syscall *
86alloc_fsc(void)
87{
88
89	return (malloc(sizeof(struct linux_syscall)));
90}
91
92/* Clear up and free parts of the fsc structure. */
93static void
94free_fsc(struct linux_syscall *fsc)
95{
96	int i;
97
98	if (fsc->s_args) {
99		for (i = 0; i < fsc->nargs; i++)
100			free(fsc->s_args[i]);
101		free(fsc->s_args);
102	}
103	free(fsc);
104}
105
106/*
107 * Called when a process has entered a system call.  nargs is the
108 * number of words, not number of arguments (a necessary distinction
109 * in some cases).  Note that if the STOPEVENT() code in i386/i386/trap.c
110 * is ever changed these functions need to keep up.
111 */
112
113void
114amd64_linux32_syscall_entry(struct trussinfo *trussinfo, int nargs)
115{
116	struct reg regs;
117	struct linux_syscall *fsc;
118	struct syscall *sc;
119	lwpid_t tid;
120	int i, syscall_num;
121
122	tid = trussinfo->curthread->tid;
123
124	if (ptrace(PT_GETREGS, tid, (caddr_t)&regs, 0) < 0) {
125		fprintf(trussinfo->outfile, "-- CANNOT READ REGISTERS --\n");
126		return;
127	}
128
129	syscall_num = regs.r_rax;
130
131	fsc = alloc_fsc();
132	if (fsc == NULL)
133		return;
134	fsc->number = syscall_num;
135	fsc->name = (syscall_num < 0 || syscall_num >= nsyscalls) ?
136	    NULL : linux32_syscallnames[syscall_num];
137	if (!fsc->name) {
138		fprintf(trussinfo->outfile, "-- UNKNOWN SYSCALL %d --\n",
139		    syscall_num);
140	}
141
142	if (fsc->name && (trussinfo->flags & FOLLOWFORKS) &&
143	    (strcmp(fsc->name, "linux_fork") == 0 ||
144	    strcmp(fsc->name, "linux_vfork") == 0))
145		trussinfo->curthread->in_fork = 1;
146
147	if (nargs == 0)
148		return;
149
150	/*
151	 * Linux passes syscall arguments in registers, not
152	 * on the stack.  Fortunately, we've got access to the
153	 * register set.  Note that we don't bother checking the
154	 * number of arguments.	And what does linux do for syscalls
155	 * that have more than five arguments?
156	 */
157
158	fsc->args[0] = regs.r_rbx;
159	fsc->args[1] = regs.r_rcx;
160	fsc->args[2] = regs.r_rdx;
161	fsc->args[3] = regs.r_rsi;
162	fsc->args[4] = regs.r_rdi;
163
164	sc = get_syscall(fsc->name);
165	if (sc)
166		fsc->nargs = sc->nargs;
167	else {
168#if DEBUG
169		fprintf(trussinfo->outfile, "unknown syscall %s -- setting "
170		    "args to %d\n", fsc->name, nargs);
171#endif
172		fsc->nargs = nargs;
173	}
174
175	fsc->s_args = calloc(1, (1 + fsc->nargs) * sizeof(char *));
176	fsc->sc = sc;
177
178	/*
179	 * At this point, we set up the system call arguments.
180	 * We ignore any OUT ones, however -- those are arguments that
181	 * are set by the system call, and so are probably meaningless
182	 * now.	This doesn't currently support arguments that are
183	 * passed in *and* out, however.
184	 */
185
186	if (fsc->name) {
187#if DEBUG
188		fprintf(stderr, "syscall %s(", fsc->name);
189#endif
190		for (i = 0; i < fsc->nargs; i++) {
191#if DEBUG
192			fprintf(stderr, "0x%x%s", sc ?
193			    fsc->args[sc->args[i].offset] : fsc->args[i],
194			    i < (fsc->nargs - 1) ? "," : "");
195#endif
196			if (sc && !(sc->args[i].type & OUT)) {
197				fsc->s_args[i] = print_arg(&sc->args[i],
198				    fsc->args, 0, trussinfo);
199			}
200		}
201#if DEBUG
202		fprintf(stderr, ")\n");
203#endif
204	}
205
206#if DEBUG
207	fprintf(trussinfo->outfile, "\n");
208#endif
209
210	if (fsc->name != NULL && (strcmp(fsc->name, "linux_execve") == 0 ||
211	    strcmp(fsc->name, "exit") == 0)) {
212		/*
213		 * XXX
214		 * This could be done in a more general
215		 * manner but it still wouldn't be very pretty.
216		 */
217		if (strcmp(fsc->name, "linux_execve") == 0) {
218			if ((trussinfo->flags & EXECVEARGS) == 0) {
219				if (fsc->s_args[1]) {
220					free(fsc->s_args[1]);
221					fsc->s_args[1] = NULL;
222				}
223			}
224			if ((trussinfo->flags & EXECVEENVS) == 0) {
225				if (fsc->s_args[2]) {
226					free(fsc->s_args[2]);
227					fsc->s_args[2] = NULL;
228				}
229			}
230		}
231	}
232	trussinfo->curthread->fsc = fsc;
233}
234
235/*
236 * Linux syscalls return negative errno's, we do positive and map them
237 */
238const int bsd_to_linux_errno[] = {
239	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
240	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
241	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
242	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
243	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
244	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
245	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
246	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
247	-6,
248};
249
250long
251amd64_linux32_syscall_exit(struct trussinfo *trussinfo,
252    int syscall_num __unused)
253{
254	struct reg regs;
255	struct linux_syscall *fsc;
256	struct syscall *sc;
257	lwpid_t tid;
258	long retval;
259	int errorp, i;
260
261	if (trussinfo->curthread->fsc == NULL)
262		return (-1);
263
264	tid = trussinfo->curthread->tid;
265
266	if (ptrace(PT_GETREGS, tid, (caddr_t)&regs, 0) < 0) {
267		fprintf(trussinfo->outfile, "-- CANNOT READ REGISTERS --\n");
268		return (-1);
269	}
270
271	retval = regs.r_rax;
272	errorp = !!(regs.r_rflags & PSL_C);
273
274	/*
275	 * This code, while simpler than the initial versions I used, could
276	 * stand some significant cleaning.
277	 */
278
279	fsc = trussinfo->curthread->fsc;
280	sc = fsc->sc;
281	if (!sc) {
282		for (i = 0; i < fsc->nargs; i++)
283			asprintf(&fsc->s_args[i], "0x%lx", fsc->args[i]);
284	} else {
285		/*
286		 * Here, we only look for arguments that have OUT masked in --
287		 * otherwise, they were handled in the syscall_entry function.
288		 */
289		for (i = 0; i < sc->nargs; i++) {
290			char *temp;
291			if (sc->args[i].type & OUT) {
292				/*
293				 * If an error occurred, then don't bother
294				 * getting the data; it may not be valid.
295				 */
296				if (errorp) {
297					asprintf(&temp, "0x%lx",
298					    fsc->args[sc->args[i].offset]);
299				} else {
300					temp = print_arg(&sc->args[i],
301					    fsc->args, retval, trussinfo);
302				}
303				fsc->s_args[i] = temp;
304			}
305		}
306	}
307
308	/*
309	 * It would probably be a good idea to merge the error handling,
310	 * but that complicates things considerably.
311	 */
312	if (errorp) {
313		for (i = 0;
314		    (size_t)i < sizeof(bsd_to_linux_errno) / sizeof(int); i++) {
315			if (retval == bsd_to_linux_errno[i])
316				break;
317		}
318	}
319
320	if (fsc->name != NULL && (strcmp(fsc->name, "linux_execve") == 0 ||
321	    strcmp(fsc->name, "exit") == 0))
322		trussinfo->curthread->in_syscall = 1;
323
324	print_syscall_ret(trussinfo, fsc->name, fsc->nargs, fsc->s_args, errorp,
325	    errorp ? i : retval, fsc->sc);
326	free_fsc(fsc);
327
328	return (retval);
329}
330