linux_sysvec.c revision 218658
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/i386/linux/linux_sysvec.c 218658 2011-02-13 19:07:48Z dchagin $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/exec.h>
35#include <sys/fcntl.h>
36#include <sys/imgact.h>
37#include <sys/imgact_aout.h>
38#include <sys/imgact_elf.h>
39#include <sys/kernel.h>
40#include <sys/lock.h>
41#include <sys/malloc.h>
42#include <sys/module.h>
43#include <sys/mutex.h>
44#include <sys/proc.h>
45#include <sys/signalvar.h>
46#include <sys/syscallsubr.h>
47#include <sys/sysent.h>
48#include <sys/sysproto.h>
49#include <sys/vnode.h>
50#include <sys/eventhandler.h>
51
52#include <vm/vm.h>
53#include <vm/pmap.h>
54#include <vm/vm_extern.h>
55#include <vm/vm_map.h>
56#include <vm/vm_object.h>
57#include <vm/vm_page.h>
58#include <vm/vm_param.h>
59
60#include <machine/cpu.h>
61#include <machine/cputypes.h>
62#include <machine/md_var.h>
63#include <machine/pcb.h>
64
65#include <i386/linux/linux.h>
66#include <i386/linux/linux_proto.h>
67#include <compat/linux/linux_emul.h>
68#include <compat/linux/linux_futex.h>
69#include <compat/linux/linux_mib.h>
70#include <compat/linux/linux_misc.h>
71#include <compat/linux/linux_signal.h>
72#include <compat/linux/linux_util.h>
73
74MODULE_VERSION(linux, 1);
75
76MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
77
78#if BYTE_ORDER == LITTLE_ENDIAN
79#define SHELLMAGIC      0x2123 /* #! */
80#else
81#define SHELLMAGIC      0x2321
82#endif
83
84/*
85 * Allow the sendsig functions to use the ldebug() facility
86 * even though they are not syscalls themselves. Map them
87 * to syscall 0. This is slightly less bogus than using
88 * ldebug(sigreturn).
89 */
90#define	LINUX_SYS_linux_rt_sendsig	0
91#define	LINUX_SYS_linux_sendsig		0
92
93extern char linux_sigcode[];
94extern int linux_szsigcode;
95
96extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
97
98SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
99SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
100
101static int	linux_fixup(register_t **stack_base,
102		    struct image_params *iparams);
103static int	elf_linux_fixup(register_t **stack_base,
104		    struct image_params *iparams);
105static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
106static void	exec_linux_setregs(struct thread *td,
107		    struct image_params *imgp, u_long stack);
108static register_t *linux_copyout_strings(struct image_params *imgp);
109static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
110
111static int linux_szplatform;
112const char *linux_platform;
113
114static eventhandler_tag linux_exit_tag;
115static eventhandler_tag linux_schedtail_tag;
116static eventhandler_tag linux_exec_tag;
117
118/*
119 * Linux syscalls return negative errno's, we do positive and map them
120 * Reference:
121 *   FreeBSD: src/sys/sys/errno.h
122 *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
123 *            linux-2.6.17.8/include/asm-generic/errno.h
124 */
125static int bsd_to_linux_errno[ELAST + 1] = {
126	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
127	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
128	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
129	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
130	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
131	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
132	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
133	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
134	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
135	 -72, -67, -71
136};
137
138int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
139	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
140	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
141	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
142	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
143	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
144	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
145	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
146	0, LINUX_SIGUSR1, LINUX_SIGUSR2
147};
148
149int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
150	SIGHUP, SIGINT, SIGQUIT, SIGILL,
151	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
152	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
153	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
154	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
155	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
156	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
157	SIGIO, SIGURG, SIGSYS
158};
159
160#define LINUX_T_UNKNOWN  255
161static int _bsd_to_linux_trapcode[] = {
162	LINUX_T_UNKNOWN,	/* 0 */
163	6,			/* 1  T_PRIVINFLT */
164	LINUX_T_UNKNOWN,	/* 2 */
165	3,			/* 3  T_BPTFLT */
166	LINUX_T_UNKNOWN,	/* 4 */
167	LINUX_T_UNKNOWN,	/* 5 */
168	16,			/* 6  T_ARITHTRAP */
169	254,			/* 7  T_ASTFLT */
170	LINUX_T_UNKNOWN,	/* 8 */
171	13,			/* 9  T_PROTFLT */
172	1,			/* 10 T_TRCTRAP */
173	LINUX_T_UNKNOWN,	/* 11 */
174	14,			/* 12 T_PAGEFLT */
175	LINUX_T_UNKNOWN,	/* 13 */
176	17,			/* 14 T_ALIGNFLT */
177	LINUX_T_UNKNOWN,	/* 15 */
178	LINUX_T_UNKNOWN,	/* 16 */
179	LINUX_T_UNKNOWN,	/* 17 */
180	0,			/* 18 T_DIVIDE */
181	2,			/* 19 T_NMI */
182	4,			/* 20 T_OFLOW */
183	5,			/* 21 T_BOUND */
184	7,			/* 22 T_DNA */
185	8,			/* 23 T_DOUBLEFLT */
186	9,			/* 24 T_FPOPFLT */
187	10,			/* 25 T_TSSFLT */
188	11,			/* 26 T_SEGNPFLT */
189	12,			/* 27 T_STKFLT */
190	18,			/* 28 T_MCHK */
191	19,			/* 29 T_XMMFLT */
192	15			/* 30 T_RESERVED */
193};
194#define bsd_to_linux_trapcode(code) \
195    ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
196     _bsd_to_linux_trapcode[(code)]: \
197     LINUX_T_UNKNOWN)
198
199/*
200 * If FreeBSD & Linux have a difference of opinion about what a trap
201 * means, deal with it here.
202 *
203 * MPSAFE
204 */
205static int
206translate_traps(int signal, int trap_code)
207{
208	if (signal != SIGBUS)
209		return signal;
210	switch (trap_code) {
211	case T_PROTFLT:
212	case T_TSSFLT:
213	case T_DOUBLEFLT:
214	case T_PAGEFLT:
215		return SIGSEGV;
216	default:
217		return signal;
218	}
219}
220
221static int
222linux_fixup(register_t **stack_base, struct image_params *imgp)
223{
224	register_t *argv, *envp;
225
226	argv = *stack_base;
227	envp = *stack_base + (imgp->args->argc + 1);
228	(*stack_base)--;
229	**stack_base = (intptr_t)(void *)envp;
230	(*stack_base)--;
231	**stack_base = (intptr_t)(void *)argv;
232	(*stack_base)--;
233	**stack_base = imgp->args->argc;
234	return (0);
235}
236
237static int
238elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
239{
240	struct proc *p;
241	Elf32_Auxargs *args;
242	Elf32_Addr *uplatform;
243	struct ps_strings *arginfo;
244	register_t *pos;
245
246	KASSERT(curthread->td_proc == imgp->proc,
247	    ("unsafe elf_linux_fixup(), should be curproc"));
248
249	p = imgp->proc;
250	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
251	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
252	    linux_szplatform);
253	args = (Elf32_Auxargs *)imgp->auxargs;
254	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
255
256	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
257
258	/*
259	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
260	 * as it has appeared in the 2.4.0-rc7 first time.
261	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
262	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
263	 * is not present.
264	 * Also see linux_times() implementation.
265	 */
266	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
267		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
268	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
269	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
270	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
271	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
272	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
273	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
274	AUXARGS_ENTRY(pos, AT_BASE, args->base);
275	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
276	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
277	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
278	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
279	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
280	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
281	if (args->execfd != -1)
282		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
283	AUXARGS_ENTRY(pos, AT_NULL, 0);
284
285	free(imgp->auxargs, M_TEMP);
286	imgp->auxargs = NULL;
287
288	(*stack_base)--;
289	**stack_base = (register_t)imgp->args->argc;
290	return (0);
291}
292
293/*
294 * Copied from kern/kern_exec.c
295 */
296static register_t *
297linux_copyout_strings(struct image_params *imgp)
298{
299	int argc, envc;
300	char **vectp;
301	char *stringp, *destp;
302	register_t *stack_base;
303	struct ps_strings *arginfo;
304	struct proc *p;
305
306	/*
307	 * Calculate string base and vector table pointers.
308	 * Also deal with signal trampoline code for this exec type.
309	 */
310	p = imgp->proc;
311	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
312	destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
313	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
314	    sizeof(char *));
315
316	/*
317	 * install sigcode
318	 */
319	copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
320	    linux_szsigcode), linux_szsigcode);
321
322	/*
323	 * install LINUX_PLATFORM
324	 */
325	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
326	    linux_szplatform), linux_szplatform);
327
328	/*
329	 * If we have a valid auxargs ptr, prepare some room
330	 * on the stack.
331	 */
332	if (imgp->auxargs) {
333		/*
334		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
335		 * lower compatibility.
336		 */
337		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
338		    (LINUX_AT_COUNT * 2);
339		/*
340		 * The '+ 2' is for the null pointers at the end of each of
341		 * the arg and env vector sets,and imgp->auxarg_size is room
342		 * for argument of Runtime loader.
343		 */
344		vectp = (char **)(destp - (imgp->args->argc +
345		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
346	} else {
347		/*
348		 * The '+ 2' is for the null pointers at the end of each of
349		 * the arg and env vector sets
350		 */
351		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
352		    sizeof(char *));
353	}
354
355	/*
356	 * vectp also becomes our initial stack base
357	 */
358	stack_base = (register_t *)vectp;
359
360	stringp = imgp->args->begin_argv;
361	argc = imgp->args->argc;
362	envc = imgp->args->envc;
363
364	/*
365	 * Copy out strings - arguments and environment.
366	 */
367	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
368
369	/*
370	 * Fill in "ps_strings" struct for ps, w, etc.
371	 */
372	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
373	suword(&arginfo->ps_nargvstr, argc);
374
375	/*
376	 * Fill in argument portion of vector table.
377	 */
378	for (; argc > 0; --argc) {
379		suword(vectp++, (long)(intptr_t)destp);
380		while (*stringp++ != 0)
381			destp++;
382		destp++;
383	}
384
385	/* a null vector table pointer separates the argp's from the envp's */
386	suword(vectp++, 0);
387
388	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
389	suword(&arginfo->ps_nenvstr, envc);
390
391	/*
392	 * Fill in environment portion of vector table.
393	 */
394	for (; envc > 0; --envc) {
395		suword(vectp++, (long)(intptr_t)destp);
396		while (*stringp++ != 0)
397			destp++;
398		destp++;
399	}
400
401	/* end of vector table is a null pointer */
402	suword(vectp, 0);
403
404	return (stack_base);
405}
406
407
408
409extern int _ucodesel, _udatasel;
410extern unsigned long linux_sznonrtsigcode;
411
412static void
413linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
414{
415	struct thread *td = curthread;
416	struct proc *p = td->td_proc;
417	struct sigacts *psp;
418	struct trapframe *regs;
419	struct l_rt_sigframe *fp, frame;
420	int sig, code;
421	int oonstack;
422
423	sig = ksi->ksi_signo;
424	code = ksi->ksi_code;
425	PROC_LOCK_ASSERT(p, MA_OWNED);
426	psp = p->p_sigacts;
427	mtx_assert(&psp->ps_mtx, MA_OWNED);
428	regs = td->td_frame;
429	oonstack = sigonstack(regs->tf_esp);
430
431#ifdef DEBUG
432	if (ldebug(rt_sendsig))
433		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
434		    catcher, sig, (void*)mask, code);
435#endif
436	/*
437	 * Allocate space for the signal handler context.
438	 */
439	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
440	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
441		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
442		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
443	} else
444		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
445	mtx_unlock(&psp->ps_mtx);
446
447	/*
448	 * Build the argument list for the signal handler.
449	 */
450	if (p->p_sysent->sv_sigtbl)
451		if (sig <= p->p_sysent->sv_sigsize)
452			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
453
454	bzero(&frame, sizeof(frame));
455
456	frame.sf_handler = catcher;
457	frame.sf_sig = sig;
458	frame.sf_siginfo = &fp->sf_si;
459	frame.sf_ucontext = &fp->sf_sc;
460
461	/* Fill in POSIX parts */
462	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
463
464	/*
465	 * Build the signal context to be used by sigreturn.
466	 */
467	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
468	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
469
470	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
471	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
472	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
473	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
474	PROC_UNLOCK(p);
475
476	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
477
478	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
479	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
480	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
481	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
482	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
483	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
484	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
485	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
486	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
487	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
488	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
489	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
490	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
491	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
492	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
493	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
494	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
495	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
496	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
497	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
498
499#ifdef DEBUG
500	if (ldebug(rt_sendsig))
501		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
502		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
503		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
504#endif
505
506	if (copyout(&frame, fp, sizeof(frame)) != 0) {
507		/*
508		 * Process has trashed its stack; give it an illegal
509		 * instruction to halt it in its tracks.
510		 */
511#ifdef DEBUG
512		if (ldebug(rt_sendsig))
513			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
514			    fp, oonstack);
515#endif
516		PROC_LOCK(p);
517		sigexit(td, SIGILL);
518	}
519
520	/*
521	 * Build context to run handler in.
522	 */
523	regs->tf_esp = (int)fp;
524	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
525	    linux_sznonrtsigcode;
526	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
527	regs->tf_cs = _ucodesel;
528	regs->tf_ds = _udatasel;
529	regs->tf_es = _udatasel;
530	regs->tf_fs = _udatasel;
531	regs->tf_ss = _udatasel;
532	PROC_LOCK(p);
533	mtx_lock(&psp->ps_mtx);
534}
535
536
537/*
538 * Send an interrupt to process.
539 *
540 * Stack is set up to allow sigcode stored
541 * in u. to call routine, followed by kcall
542 * to sigreturn routine below.  After sigreturn
543 * resets the signal mask, the stack, and the
544 * frame pointer, it returns to the user
545 * specified pc, psl.
546 */
547static void
548linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
549{
550	struct thread *td = curthread;
551	struct proc *p = td->td_proc;
552	struct sigacts *psp;
553	struct trapframe *regs;
554	struct l_sigframe *fp, frame;
555	l_sigset_t lmask;
556	int sig, code;
557	int oonstack, i;
558
559	PROC_LOCK_ASSERT(p, MA_OWNED);
560	psp = p->p_sigacts;
561	sig = ksi->ksi_signo;
562	code = ksi->ksi_code;
563	mtx_assert(&psp->ps_mtx, MA_OWNED);
564	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
565		/* Signal handler installed with SA_SIGINFO. */
566		linux_rt_sendsig(catcher, ksi, mask);
567		return;
568	}
569	regs = td->td_frame;
570	oonstack = sigonstack(regs->tf_esp);
571
572#ifdef DEBUG
573	if (ldebug(sendsig))
574		printf(ARGS(sendsig, "%p, %d, %p, %u"),
575		    catcher, sig, (void*)mask, code);
576#endif
577
578	/*
579	 * Allocate space for the signal handler context.
580	 */
581	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
582	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
583		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
584		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
585	} else
586		fp = (struct l_sigframe *)regs->tf_esp - 1;
587	mtx_unlock(&psp->ps_mtx);
588	PROC_UNLOCK(p);
589
590	/*
591	 * Build the argument list for the signal handler.
592	 */
593	if (p->p_sysent->sv_sigtbl)
594		if (sig <= p->p_sysent->sv_sigsize)
595			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
596
597	bzero(&frame, sizeof(frame));
598
599	frame.sf_handler = catcher;
600	frame.sf_sig = sig;
601
602	bsd_to_linux_sigset(mask, &lmask);
603
604	/*
605	 * Build the signal context to be used by sigreturn.
606	 */
607	frame.sf_sc.sc_mask   = lmask.__bits[0];
608	frame.sf_sc.sc_gs     = rgs();
609	frame.sf_sc.sc_fs     = regs->tf_fs;
610	frame.sf_sc.sc_es     = regs->tf_es;
611	frame.sf_sc.sc_ds     = regs->tf_ds;
612	frame.sf_sc.sc_edi    = regs->tf_edi;
613	frame.sf_sc.sc_esi    = regs->tf_esi;
614	frame.sf_sc.sc_ebp    = regs->tf_ebp;
615	frame.sf_sc.sc_ebx    = regs->tf_ebx;
616	frame.sf_sc.sc_edx    = regs->tf_edx;
617	frame.sf_sc.sc_ecx    = regs->tf_ecx;
618	frame.sf_sc.sc_eax    = regs->tf_eax;
619	frame.sf_sc.sc_eip    = regs->tf_eip;
620	frame.sf_sc.sc_cs     = regs->tf_cs;
621	frame.sf_sc.sc_eflags = regs->tf_eflags;
622	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
623	frame.sf_sc.sc_ss     = regs->tf_ss;
624	frame.sf_sc.sc_err    = regs->tf_err;
625	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
626	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
627
628	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
629		frame.sf_extramask[i] = lmask.__bits[i+1];
630
631	if (copyout(&frame, fp, sizeof(frame)) != 0) {
632		/*
633		 * Process has trashed its stack; give it an illegal
634		 * instruction to halt it in its tracks.
635		 */
636		PROC_LOCK(p);
637		sigexit(td, SIGILL);
638	}
639
640	/*
641	 * Build context to run handler in.
642	 */
643	regs->tf_esp = (int)fp;
644	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
645	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
646	regs->tf_cs = _ucodesel;
647	regs->tf_ds = _udatasel;
648	regs->tf_es = _udatasel;
649	regs->tf_fs = _udatasel;
650	regs->tf_ss = _udatasel;
651	PROC_LOCK(p);
652	mtx_lock(&psp->ps_mtx);
653}
654
655/*
656 * System call to cleanup state after a signal
657 * has been taken.  Reset signal mask and
658 * stack state from context left by sendsig (above).
659 * Return to previous pc and psl as specified by
660 * context left by sendsig. Check carefully to
661 * make sure that the user has not modified the
662 * psl to gain improper privileges or to cause
663 * a machine fault.
664 */
665int
666linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
667{
668	struct l_sigframe frame;
669	struct trapframe *regs;
670	l_sigset_t lmask;
671	sigset_t bmask;
672	int eflags, i;
673	ksiginfo_t ksi;
674
675	regs = td->td_frame;
676
677#ifdef DEBUG
678	if (ldebug(sigreturn))
679		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
680#endif
681	/*
682	 * The trampoline code hands us the sigframe.
683	 * It is unsafe to keep track of it ourselves, in the event that a
684	 * program jumps out of a signal handler.
685	 */
686	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
687		return (EFAULT);
688
689	/*
690	 * Check for security violations.
691	 */
692#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
693	eflags = frame.sf_sc.sc_eflags;
694	/*
695	 * XXX do allow users to change the privileged flag PSL_RF.  The
696	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
697	 * sometimes set it there too.  tf_eflags is kept in the signal
698	 * context during signal handling and there is no other place
699	 * to remember it, so the PSL_RF bit may be corrupted by the
700	 * signal handler without us knowing.  Corruption of the PSL_RF
701	 * bit at worst causes one more or one less debugger trap, so
702	 * allowing it is fairly harmless.
703	 */
704	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
705		return(EINVAL);
706
707	/*
708	 * Don't allow users to load a valid privileged %cs.  Let the
709	 * hardware check for invalid selectors, excess privilege in
710	 * other selectors, invalid %eip's and invalid %esp's.
711	 */
712#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
713	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
714		ksiginfo_init_trap(&ksi);
715		ksi.ksi_signo = SIGBUS;
716		ksi.ksi_code = BUS_OBJERR;
717		ksi.ksi_trapno = T_PROTFLT;
718		ksi.ksi_addr = (void *)regs->tf_eip;
719		trapsignal(td, &ksi);
720		return(EINVAL);
721	}
722
723	lmask.__bits[0] = frame.sf_sc.sc_mask;
724	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
725		lmask.__bits[i+1] = frame.sf_extramask[i];
726	linux_to_bsd_sigset(&lmask, &bmask);
727	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
728
729	/*
730	 * Restore signal context.
731	 */
732	/* %gs was restored by the trampoline. */
733	regs->tf_fs     = frame.sf_sc.sc_fs;
734	regs->tf_es     = frame.sf_sc.sc_es;
735	regs->tf_ds     = frame.sf_sc.sc_ds;
736	regs->tf_edi    = frame.sf_sc.sc_edi;
737	regs->tf_esi    = frame.sf_sc.sc_esi;
738	regs->tf_ebp    = frame.sf_sc.sc_ebp;
739	regs->tf_ebx    = frame.sf_sc.sc_ebx;
740	regs->tf_edx    = frame.sf_sc.sc_edx;
741	regs->tf_ecx    = frame.sf_sc.sc_ecx;
742	regs->tf_eax    = frame.sf_sc.sc_eax;
743	regs->tf_eip    = frame.sf_sc.sc_eip;
744	regs->tf_cs     = frame.sf_sc.sc_cs;
745	regs->tf_eflags = eflags;
746	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
747	regs->tf_ss     = frame.sf_sc.sc_ss;
748
749	return (EJUSTRETURN);
750}
751
752/*
753 * System call to cleanup state after a signal
754 * has been taken.  Reset signal mask and
755 * stack state from context left by rt_sendsig (above).
756 * Return to previous pc and psl as specified by
757 * context left by sendsig. Check carefully to
758 * make sure that the user has not modified the
759 * psl to gain improper privileges or to cause
760 * a machine fault.
761 */
762int
763linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
764{
765	struct l_ucontext uc;
766	struct l_sigcontext *context;
767	sigset_t bmask;
768	l_stack_t *lss;
769	stack_t ss;
770	struct trapframe *regs;
771	int eflags;
772	ksiginfo_t ksi;
773
774	regs = td->td_frame;
775
776#ifdef DEBUG
777	if (ldebug(rt_sigreturn))
778		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
779#endif
780	/*
781	 * The trampoline code hands us the ucontext.
782	 * It is unsafe to keep track of it ourselves, in the event that a
783	 * program jumps out of a signal handler.
784	 */
785	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
786		return (EFAULT);
787
788	context = &uc.uc_mcontext;
789
790	/*
791	 * Check for security violations.
792	 */
793#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
794	eflags = context->sc_eflags;
795	/*
796	 * XXX do allow users to change the privileged flag PSL_RF.  The
797	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
798	 * sometimes set it there too.  tf_eflags is kept in the signal
799	 * context during signal handling and there is no other place
800	 * to remember it, so the PSL_RF bit may be corrupted by the
801	 * signal handler without us knowing.  Corruption of the PSL_RF
802	 * bit at worst causes one more or one less debugger trap, so
803	 * allowing it is fairly harmless.
804	 */
805	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
806		return(EINVAL);
807
808	/*
809	 * Don't allow users to load a valid privileged %cs.  Let the
810	 * hardware check for invalid selectors, excess privilege in
811	 * other selectors, invalid %eip's and invalid %esp's.
812	 */
813#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
814	if (!CS_SECURE(context->sc_cs)) {
815		ksiginfo_init_trap(&ksi);
816		ksi.ksi_signo = SIGBUS;
817		ksi.ksi_code = BUS_OBJERR;
818		ksi.ksi_trapno = T_PROTFLT;
819		ksi.ksi_addr = (void *)regs->tf_eip;
820		trapsignal(td, &ksi);
821		return(EINVAL);
822	}
823
824	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
825	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
826
827	/*
828	 * Restore signal context
829	 */
830	/* %gs was restored by the trampoline. */
831	regs->tf_fs     = context->sc_fs;
832	regs->tf_es     = context->sc_es;
833	regs->tf_ds     = context->sc_ds;
834	regs->tf_edi    = context->sc_edi;
835	regs->tf_esi    = context->sc_esi;
836	regs->tf_ebp    = context->sc_ebp;
837	regs->tf_ebx    = context->sc_ebx;
838	regs->tf_edx    = context->sc_edx;
839	regs->tf_ecx    = context->sc_ecx;
840	regs->tf_eax    = context->sc_eax;
841	regs->tf_eip    = context->sc_eip;
842	regs->tf_cs     = context->sc_cs;
843	regs->tf_eflags = eflags;
844	regs->tf_esp    = context->sc_esp_at_signal;
845	regs->tf_ss     = context->sc_ss;
846
847	/*
848	 * call sigaltstack & ignore results..
849	 */
850	lss = &uc.uc_stack;
851	ss.ss_sp = lss->ss_sp;
852	ss.ss_size = lss->ss_size;
853	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
854
855#ifdef DEBUG
856	if (ldebug(rt_sigreturn))
857		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
858		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
859#endif
860	(void)kern_sigaltstack(td, &ss, NULL);
861
862	return (EJUSTRETURN);
863}
864
865static int
866linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
867{
868	struct proc *p;
869	struct trapframe *frame;
870
871	p = td->td_proc;
872	frame = td->td_frame;
873
874	sa->code = frame->tf_eax;
875	sa->args[0] = frame->tf_ebx;
876	sa->args[1] = frame->tf_ecx;
877	sa->args[2] = frame->tf_edx;
878	sa->args[3] = frame->tf_esi;
879	sa->args[4] = frame->tf_edi;
880	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
881
882	if (sa->code >= p->p_sysent->sv_size)
883		sa->callp = &p->p_sysent->sv_table[0];
884 	else
885 		sa->callp = &p->p_sysent->sv_table[sa->code];
886	sa->narg = sa->callp->sy_narg;
887
888	td->td_retval[0] = 0;
889	td->td_retval[1] = frame->tf_edx;
890
891	return (0);
892}
893
894/*
895 * If a linux binary is exec'ing something, try this image activator
896 * first.  We override standard shell script execution in order to
897 * be able to modify the interpreter path.  We only do this if a linux
898 * binary is doing the exec, so we do not create an EXEC module for it.
899 */
900static int	exec_linux_imgact_try(struct image_params *iparams);
901
902static int
903exec_linux_imgact_try(struct image_params *imgp)
904{
905    const char *head = (const char *)imgp->image_header;
906    char *rpath;
907    int error = -1;
908
909    /*
910     * The interpreter for shell scripts run from a linux binary needs
911     * to be located in /compat/linux if possible in order to recursively
912     * maintain linux path emulation.
913     */
914    if (((const short *)head)[0] == SHELLMAGIC) {
915	    /*
916	     * Run our normal shell image activator.  If it succeeds attempt
917	     * to use the alternate path for the interpreter.  If an alternate
918	     * path is found, use our stringspace to store it.
919	     */
920	    if ((error = exec_shell_imgact(imgp)) == 0) {
921		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
922			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
923		    if (rpath != NULL)
924			    imgp->args->fname_buf =
925				imgp->interpreter_name = rpath;
926	    }
927    }
928    return (error);
929}
930
931/*
932 * exec_setregs may initialize some registers differently than Linux
933 * does, thus potentially confusing Linux binaries. If necessary, we
934 * override the exec_setregs default(s) here.
935 */
936static void
937exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
938{
939	struct pcb *pcb = td->td_pcb;
940
941	exec_setregs(td, imgp, stack);
942
943	/* Linux sets %gs to 0, we default to _udatasel */
944	pcb->pcb_gs = 0;
945	load_gs(0);
946
947	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
948}
949
950static void
951linux_get_machine(const char **dst)
952{
953
954	switch (cpu_class) {
955	case CPUCLASS_686:
956		*dst = "i686";
957		break;
958	case CPUCLASS_586:
959		*dst = "i586";
960		break;
961	case CPUCLASS_486:
962		*dst = "i486";
963		break;
964	default:
965		*dst = "i386";
966	}
967}
968
969struct sysentvec linux_sysvec = {
970	.sv_size	= LINUX_SYS_MAXSYSCALL,
971	.sv_table	= linux_sysent,
972	.sv_mask	= 0,
973	.sv_sigsize	= LINUX_SIGTBLSZ,
974	.sv_sigtbl	= bsd_to_linux_signal,
975	.sv_errsize	= ELAST + 1,
976	.sv_errtbl	= bsd_to_linux_errno,
977	.sv_transtrap	= translate_traps,
978	.sv_fixup	= linux_fixup,
979	.sv_sendsig	= linux_sendsig,
980	.sv_sigcode	= linux_sigcode,
981	.sv_szsigcode	= &linux_szsigcode,
982	.sv_prepsyscall	= NULL,
983	.sv_name	= "Linux a.out",
984	.sv_coredump	= NULL,
985	.sv_imgact_try	= exec_linux_imgact_try,
986	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
987	.sv_pagesize	= PAGE_SIZE,
988	.sv_minuser	= VM_MIN_ADDRESS,
989	.sv_maxuser	= VM_MAXUSER_ADDRESS,
990	.sv_usrstack	= USRSTACK,
991	.sv_psstrings	= PS_STRINGS,
992	.sv_stackprot	= VM_PROT_ALL,
993	.sv_copyout_strings = exec_copyout_strings,
994	.sv_setregs	= exec_linux_setregs,
995	.sv_fixlimit	= NULL,
996	.sv_maxssiz	= NULL,
997	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
998	.sv_set_syscall_retval = cpu_set_syscall_retval,
999	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1000	.sv_syscallnames = NULL,
1001};
1002
1003struct sysentvec elf_linux_sysvec = {
1004	.sv_size	= LINUX_SYS_MAXSYSCALL,
1005	.sv_table	= linux_sysent,
1006	.sv_mask	= 0,
1007	.sv_sigsize	= LINUX_SIGTBLSZ,
1008	.sv_sigtbl	= bsd_to_linux_signal,
1009	.sv_errsize	= ELAST + 1,
1010	.sv_errtbl	= bsd_to_linux_errno,
1011	.sv_transtrap	= translate_traps,
1012	.sv_fixup	= elf_linux_fixup,
1013	.sv_sendsig	= linux_sendsig,
1014	.sv_sigcode	= linux_sigcode,
1015	.sv_szsigcode	= &linux_szsigcode,
1016	.sv_prepsyscall	= NULL,
1017	.sv_name	= "Linux ELF",
1018	.sv_coredump	= elf32_coredump,
1019	.sv_imgact_try	= exec_linux_imgact_try,
1020	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1021	.sv_pagesize	= PAGE_SIZE,
1022	.sv_minuser	= VM_MIN_ADDRESS,
1023	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1024	.sv_usrstack	= USRSTACK,
1025	.sv_psstrings	= PS_STRINGS,
1026	.sv_stackprot	= VM_PROT_ALL,
1027	.sv_copyout_strings = linux_copyout_strings,
1028	.sv_setregs	= exec_linux_setregs,
1029	.sv_fixlimit	= NULL,
1030	.sv_maxssiz	= NULL,
1031	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32,
1032	.sv_set_syscall_retval = cpu_set_syscall_retval,
1033	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1034	.sv_syscallnames = NULL,
1035};
1036
1037static char GNU_ABI_VENDOR[] = "GNU";
1038static int GNULINUX_ABI_DESC = 0;
1039
1040static boolean_t
1041linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1042{
1043	const Elf32_Word *desc;
1044	uintptr_t p;
1045
1046	p = (uintptr_t)(note + 1);
1047	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1048
1049	desc = (const Elf32_Word *)p;
1050	if (desc[0] != GNULINUX_ABI_DESC)
1051		return (FALSE);
1052
1053	/*
1054	 * For linux we encode osrel as follows (see linux_mib.c):
1055	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1056	 */
1057	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1058
1059	return (TRUE);
1060}
1061
1062static Elf_Brandnote linux_brandnote = {
1063	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1064	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1065	.hdr.n_type	= 1,
1066	.vendor		= GNU_ABI_VENDOR,
1067	.flags		= BN_TRANSLATE_OSREL,
1068	.trans_osrel	= linux_trans_osrel
1069};
1070
1071static Elf32_Brandinfo linux_brand = {
1072	.brand		= ELFOSABI_LINUX,
1073	.machine	= EM_386,
1074	.compat_3_brand	= "Linux",
1075	.emul_path	= "/compat/linux",
1076	.interp_path	= "/lib/ld-linux.so.1",
1077	.sysvec		= &elf_linux_sysvec,
1078	.interp_newpath	= NULL,
1079	.brand_note	= &linux_brandnote,
1080	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1081};
1082
1083static Elf32_Brandinfo linux_glibc2brand = {
1084	.brand		= ELFOSABI_LINUX,
1085	.machine	= EM_386,
1086	.compat_3_brand	= "Linux",
1087	.emul_path	= "/compat/linux",
1088	.interp_path	= "/lib/ld-linux.so.2",
1089	.sysvec		= &elf_linux_sysvec,
1090	.interp_newpath	= NULL,
1091	.brand_note	= &linux_brandnote,
1092	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1093};
1094
1095Elf32_Brandinfo *linux_brandlist[] = {
1096	&linux_brand,
1097	&linux_glibc2brand,
1098	NULL
1099};
1100
1101static int
1102linux_elf_modevent(module_t mod, int type, void *data)
1103{
1104	Elf32_Brandinfo **brandinfo;
1105	int error;
1106	struct linux_ioctl_handler **lihp;
1107	struct linux_device_handler **ldhp;
1108
1109	error = 0;
1110
1111	switch(type) {
1112	case MOD_LOAD:
1113		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1114		     ++brandinfo)
1115			if (elf32_insert_brand_entry(*brandinfo) < 0)
1116				error = EINVAL;
1117		if (error == 0) {
1118			SET_FOREACH(lihp, linux_ioctl_handler_set)
1119				linux_ioctl_register_handler(*lihp);
1120			SET_FOREACH(ldhp, linux_device_handler_set)
1121				linux_device_register_handler(*ldhp);
1122			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1123			sx_init(&emul_shared_lock, "emuldata->shared lock");
1124			LIST_INIT(&futex_list);
1125			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1126			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1127			      NULL, 1000);
1128			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1129			      NULL, 1000);
1130			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1131			      NULL, 1000);
1132			linux_get_machine(&linux_platform);
1133			linux_szplatform = roundup(strlen(linux_platform) + 1,
1134			    sizeof(char *));
1135			linux_osd_jail_register();
1136			stclohz = (stathz ? stathz : hz);
1137			if (bootverbose)
1138				printf("Linux ELF exec handler installed\n");
1139		} else
1140			printf("cannot insert Linux ELF brand handler\n");
1141		break;
1142	case MOD_UNLOAD:
1143		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1144		     ++brandinfo)
1145			if (elf32_brand_inuse(*brandinfo))
1146				error = EBUSY;
1147		if (error == 0) {
1148			for (brandinfo = &linux_brandlist[0];
1149			     *brandinfo != NULL; ++brandinfo)
1150				if (elf32_remove_brand_entry(*brandinfo) < 0)
1151					error = EINVAL;
1152		}
1153		if (error == 0) {
1154			SET_FOREACH(lihp, linux_ioctl_handler_set)
1155				linux_ioctl_unregister_handler(*lihp);
1156			SET_FOREACH(ldhp, linux_device_handler_set)
1157				linux_device_unregister_handler(*ldhp);
1158			mtx_destroy(&emul_lock);
1159			sx_destroy(&emul_shared_lock);
1160			mtx_destroy(&futex_mtx);
1161			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1162			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1163			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1164			linux_osd_jail_deregister();
1165			if (bootverbose)
1166				printf("Linux ELF exec handler removed\n");
1167		} else
1168			printf("Could not deinstall ELF interpreter entry\n");
1169		break;
1170	default:
1171		return EOPNOTSUPP;
1172	}
1173	return error;
1174}
1175
1176static moduledata_t linux_elf_mod = {
1177	"linuxelf",
1178	linux_elf_modevent,
1179	0
1180};
1181
1182DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1183