linux_sysvec.c revision 246085
1/*-
2 * Copyright (c) 1994-1996 S��ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/i386/linux/linux_sysvec.c 246085 2013-01-29 18:41:30Z jhb $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/exec.h>
35#include <sys/fcntl.h>
36#include <sys/imgact.h>
37#include <sys/imgact_aout.h>
38#include <sys/imgact_elf.h>
39#include <sys/kernel.h>
40#include <sys/lock.h>
41#include <sys/malloc.h>
42#include <sys/module.h>
43#include <sys/mutex.h>
44#include <sys/proc.h>
45#include <sys/signalvar.h>
46#include <sys/syscallsubr.h>
47#include <sys/sysent.h>
48#include <sys/sysproto.h>
49#include <sys/vnode.h>
50#include <sys/eventhandler.h>
51
52#include <vm/vm.h>
53#include <vm/pmap.h>
54#include <vm/vm_extern.h>
55#include <vm/vm_map.h>
56#include <vm/vm_object.h>
57#include <vm/vm_page.h>
58#include <vm/vm_param.h>
59
60#include <machine/cpu.h>
61#include <machine/cputypes.h>
62#include <machine/md_var.h>
63#include <machine/pcb.h>
64
65#include <i386/linux/linux.h>
66#include <i386/linux/linux_proto.h>
67#include <compat/linux/linux_emul.h>
68#include <compat/linux/linux_futex.h>
69#include <compat/linux/linux_ioctl.h>
70#include <compat/linux/linux_mib.h>
71#include <compat/linux/linux_misc.h>
72#include <compat/linux/linux_signal.h>
73#include <compat/linux/linux_util.h>
74
75MODULE_VERSION(linux, 1);
76
77MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
78
79#if BYTE_ORDER == LITTLE_ENDIAN
80#define SHELLMAGIC      0x2123 /* #! */
81#else
82#define SHELLMAGIC      0x2321
83#endif
84
85/*
86 * Allow the sendsig functions to use the ldebug() facility
87 * even though they are not syscalls themselves. Map them
88 * to syscall 0. This is slightly less bogus than using
89 * ldebug(sigreturn).
90 */
91#define	LINUX_SYS_linux_rt_sendsig	0
92#define	LINUX_SYS_linux_sendsig		0
93
94#define	LINUX_PS_STRINGS	(LINUX_USRSTACK - sizeof(struct ps_strings))
95
96extern char linux_sigcode[];
97extern int linux_szsigcode;
98
99extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
100
101SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
102SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
103
104static int	linux_fixup(register_t **stack_base,
105		    struct image_params *iparams);
106static int	elf_linux_fixup(register_t **stack_base,
107		    struct image_params *iparams);
108static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
109static void	exec_linux_setregs(struct thread *td,
110		    struct image_params *imgp, u_long stack);
111static register_t *linux_copyout_strings(struct image_params *imgp);
112static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
113
114static int linux_szplatform;
115const char *linux_platform;
116
117static eventhandler_tag linux_exit_tag;
118static eventhandler_tag linux_exec_tag;
119
120/*
121 * Linux syscalls return negative errno's, we do positive and map them
122 * Reference:
123 *   FreeBSD: src/sys/sys/errno.h
124 *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
125 *            linux-2.6.17.8/include/asm-generic/errno.h
126 */
127static int bsd_to_linux_errno[ELAST + 1] = {
128	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
129	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
130	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
131	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
132	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
133	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
134	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
135	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
136	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
137	 -72, -67, -71
138};
139
140int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
141	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
142	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
143	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
144	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
145	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
146	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
147	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
148	0, LINUX_SIGUSR1, LINUX_SIGUSR2
149};
150
151int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
152	SIGHUP, SIGINT, SIGQUIT, SIGILL,
153	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
154	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
155	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
156	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
157	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
158	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
159	SIGIO, SIGURG, SIGSYS
160};
161
162#define LINUX_T_UNKNOWN  255
163static int _bsd_to_linux_trapcode[] = {
164	LINUX_T_UNKNOWN,	/* 0 */
165	6,			/* 1  T_PRIVINFLT */
166	LINUX_T_UNKNOWN,	/* 2 */
167	3,			/* 3  T_BPTFLT */
168	LINUX_T_UNKNOWN,	/* 4 */
169	LINUX_T_UNKNOWN,	/* 5 */
170	16,			/* 6  T_ARITHTRAP */
171	254,			/* 7  T_ASTFLT */
172	LINUX_T_UNKNOWN,	/* 8 */
173	13,			/* 9  T_PROTFLT */
174	1,			/* 10 T_TRCTRAP */
175	LINUX_T_UNKNOWN,	/* 11 */
176	14,			/* 12 T_PAGEFLT */
177	LINUX_T_UNKNOWN,	/* 13 */
178	17,			/* 14 T_ALIGNFLT */
179	LINUX_T_UNKNOWN,	/* 15 */
180	LINUX_T_UNKNOWN,	/* 16 */
181	LINUX_T_UNKNOWN,	/* 17 */
182	0,			/* 18 T_DIVIDE */
183	2,			/* 19 T_NMI */
184	4,			/* 20 T_OFLOW */
185	5,			/* 21 T_BOUND */
186	7,			/* 22 T_DNA */
187	8,			/* 23 T_DOUBLEFLT */
188	9,			/* 24 T_FPOPFLT */
189	10,			/* 25 T_TSSFLT */
190	11,			/* 26 T_SEGNPFLT */
191	12,			/* 27 T_STKFLT */
192	18,			/* 28 T_MCHK */
193	19,			/* 29 T_XMMFLT */
194	15			/* 30 T_RESERVED */
195};
196#define bsd_to_linux_trapcode(code) \
197    ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
198     _bsd_to_linux_trapcode[(code)]: \
199     LINUX_T_UNKNOWN)
200
201/*
202 * If FreeBSD & Linux have a difference of opinion about what a trap
203 * means, deal with it here.
204 *
205 * MPSAFE
206 */
207static int
208translate_traps(int signal, int trap_code)
209{
210	if (signal != SIGBUS)
211		return signal;
212	switch (trap_code) {
213	case T_PROTFLT:
214	case T_TSSFLT:
215	case T_DOUBLEFLT:
216	case T_PAGEFLT:
217		return SIGSEGV;
218	default:
219		return signal;
220	}
221}
222
223static int
224linux_fixup(register_t **stack_base, struct image_params *imgp)
225{
226	register_t *argv, *envp;
227
228	argv = *stack_base;
229	envp = *stack_base + (imgp->args->argc + 1);
230	(*stack_base)--;
231	suword(*stack_base, (intptr_t)(void *)envp);
232	(*stack_base)--;
233	suword(*stack_base, (intptr_t)(void *)argv);
234	(*stack_base)--;
235	suword(*stack_base, imgp->args->argc);
236	return (0);
237}
238
239static int
240elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
241{
242	struct proc *p;
243	Elf32_Auxargs *args;
244	Elf32_Addr *uplatform;
245	struct ps_strings *arginfo;
246	register_t *pos;
247
248	KASSERT(curthread->td_proc == imgp->proc,
249	    ("unsafe elf_linux_fixup(), should be curproc"));
250
251	p = imgp->proc;
252	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
253	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
254	args = (Elf32_Auxargs *)imgp->auxargs;
255	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
256
257	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
258
259	/*
260	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
261	 * as it has appeared in the 2.4.0-rc7 first time.
262	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
263	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
264	 * is not present.
265	 * Also see linux_times() implementation.
266	 */
267	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
268		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
269	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
270	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
271	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
272	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
273	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
274	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
275	AUXARGS_ENTRY(pos, AT_BASE, args->base);
276	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
277	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
278	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
279	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
280	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
281	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
282	if (args->execfd != -1)
283		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
284	AUXARGS_ENTRY(pos, AT_NULL, 0);
285
286	free(imgp->auxargs, M_TEMP);
287	imgp->auxargs = NULL;
288
289	(*stack_base)--;
290	suword(*stack_base, (register_t)imgp->args->argc);
291	return (0);
292}
293
294/*
295 * Copied from kern/kern_exec.c
296 */
297static register_t *
298linux_copyout_strings(struct image_params *imgp)
299{
300	int argc, envc;
301	char **vectp;
302	char *stringp, *destp;
303	register_t *stack_base;
304	struct ps_strings *arginfo;
305	struct proc *p;
306
307	/*
308	 * Calculate string base and vector table pointers.
309	 * Also deal with signal trampoline code for this exec type.
310	 */
311	p = imgp->proc;
312	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
313	destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
314	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
315
316	/*
317	 * install LINUX_PLATFORM
318	 */
319	copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
320	    linux_szplatform);
321
322	/*
323	 * If we have a valid auxargs ptr, prepare some room
324	 * on the stack.
325	 */
326	if (imgp->auxargs) {
327		/*
328		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
329		 * lower compatibility.
330		 */
331		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
332		    (LINUX_AT_COUNT * 2);
333		/*
334		 * The '+ 2' is for the null pointers at the end of each of
335		 * the arg and env vector sets,and imgp->auxarg_size is room
336		 * for argument of Runtime loader.
337		 */
338		vectp = (char **)(destp - (imgp->args->argc +
339		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
340	} else {
341		/*
342		 * The '+ 2' is for the null pointers at the end of each of
343		 * the arg and env vector sets
344		 */
345		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
346		    sizeof(char *));
347	}
348
349	/*
350	 * vectp also becomes our initial stack base
351	 */
352	stack_base = (register_t *)vectp;
353
354	stringp = imgp->args->begin_argv;
355	argc = imgp->args->argc;
356	envc = imgp->args->envc;
357
358	/*
359	 * Copy out strings - arguments and environment.
360	 */
361	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
362
363	/*
364	 * Fill in "ps_strings" struct for ps, w, etc.
365	 */
366	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
367	suword(&arginfo->ps_nargvstr, argc);
368
369	/*
370	 * Fill in argument portion of vector table.
371	 */
372	for (; argc > 0; --argc) {
373		suword(vectp++, (long)(intptr_t)destp);
374		while (*stringp++ != 0)
375			destp++;
376		destp++;
377	}
378
379	/* a null vector table pointer separates the argp's from the envp's */
380	suword(vectp++, 0);
381
382	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
383	suword(&arginfo->ps_nenvstr, envc);
384
385	/*
386	 * Fill in environment portion of vector table.
387	 */
388	for (; envc > 0; --envc) {
389		suword(vectp++, (long)(intptr_t)destp);
390		while (*stringp++ != 0)
391			destp++;
392		destp++;
393	}
394
395	/* end of vector table is a null pointer */
396	suword(vectp, 0);
397
398	return (stack_base);
399}
400
401
402
403extern int _ucodesel, _udatasel;
404extern unsigned long linux_sznonrtsigcode;
405
406static void
407linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
408{
409	struct thread *td = curthread;
410	struct proc *p = td->td_proc;
411	struct sigacts *psp;
412	struct trapframe *regs;
413	struct l_rt_sigframe *fp, frame;
414	int sig, code;
415	int oonstack;
416
417	sig = ksi->ksi_signo;
418	code = ksi->ksi_code;
419	PROC_LOCK_ASSERT(p, MA_OWNED);
420	psp = p->p_sigacts;
421	mtx_assert(&psp->ps_mtx, MA_OWNED);
422	regs = td->td_frame;
423	oonstack = sigonstack(regs->tf_esp);
424
425#ifdef DEBUG
426	if (ldebug(rt_sendsig))
427		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
428		    catcher, sig, (void*)mask, code);
429#endif
430	/*
431	 * Allocate space for the signal handler context.
432	 */
433	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
434	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
435		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
436		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
437	} else
438		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
439	mtx_unlock(&psp->ps_mtx);
440
441	/*
442	 * Build the argument list for the signal handler.
443	 */
444	if (p->p_sysent->sv_sigtbl)
445		if (sig <= p->p_sysent->sv_sigsize)
446			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
447
448	bzero(&frame, sizeof(frame));
449
450	frame.sf_handler = catcher;
451	frame.sf_sig = sig;
452	frame.sf_siginfo = &fp->sf_si;
453	frame.sf_ucontext = &fp->sf_sc;
454
455	/* Fill in POSIX parts */
456	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
457
458	/*
459	 * Build the signal context to be used by sigreturn.
460	 */
461	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
462	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
463
464	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
465	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
466	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
467	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
468	PROC_UNLOCK(p);
469
470	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
471
472	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
473	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
474	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
475	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
476	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
477	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
478	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
479	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
480	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
481	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
482	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
483	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
484	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
485	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
486	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
487	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
488	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
489	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
490	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
491	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
492
493#ifdef DEBUG
494	if (ldebug(rt_sendsig))
495		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
496		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
497		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
498#endif
499
500	if (copyout(&frame, fp, sizeof(frame)) != 0) {
501		/*
502		 * Process has trashed its stack; give it an illegal
503		 * instruction to halt it in its tracks.
504		 */
505#ifdef DEBUG
506		if (ldebug(rt_sendsig))
507			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
508			    fp, oonstack);
509#endif
510		PROC_LOCK(p);
511		sigexit(td, SIGILL);
512	}
513
514	/*
515	 * Build context to run handler in.
516	 */
517	regs->tf_esp = (int)fp;
518	regs->tf_eip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
519	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
520	regs->tf_cs = _ucodesel;
521	regs->tf_ds = _udatasel;
522	regs->tf_es = _udatasel;
523	regs->tf_fs = _udatasel;
524	regs->tf_ss = _udatasel;
525	PROC_LOCK(p);
526	mtx_lock(&psp->ps_mtx);
527}
528
529
530/*
531 * Send an interrupt to process.
532 *
533 * Stack is set up to allow sigcode stored
534 * in u. to call routine, followed by kcall
535 * to sigreturn routine below.  After sigreturn
536 * resets the signal mask, the stack, and the
537 * frame pointer, it returns to the user
538 * specified pc, psl.
539 */
540static void
541linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
542{
543	struct thread *td = curthread;
544	struct proc *p = td->td_proc;
545	struct sigacts *psp;
546	struct trapframe *regs;
547	struct l_sigframe *fp, frame;
548	l_sigset_t lmask;
549	int sig, code;
550	int oonstack, i;
551
552	PROC_LOCK_ASSERT(p, MA_OWNED);
553	psp = p->p_sigacts;
554	sig = ksi->ksi_signo;
555	code = ksi->ksi_code;
556	mtx_assert(&psp->ps_mtx, MA_OWNED);
557	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
558		/* Signal handler installed with SA_SIGINFO. */
559		linux_rt_sendsig(catcher, ksi, mask);
560		return;
561	}
562	regs = td->td_frame;
563	oonstack = sigonstack(regs->tf_esp);
564
565#ifdef DEBUG
566	if (ldebug(sendsig))
567		printf(ARGS(sendsig, "%p, %d, %p, %u"),
568		    catcher, sig, (void*)mask, code);
569#endif
570
571	/*
572	 * Allocate space for the signal handler context.
573	 */
574	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
575	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
576		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
577		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
578	} else
579		fp = (struct l_sigframe *)regs->tf_esp - 1;
580	mtx_unlock(&psp->ps_mtx);
581	PROC_UNLOCK(p);
582
583	/*
584	 * Build the argument list for the signal handler.
585	 */
586	if (p->p_sysent->sv_sigtbl)
587		if (sig <= p->p_sysent->sv_sigsize)
588			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
589
590	bzero(&frame, sizeof(frame));
591
592	frame.sf_handler = catcher;
593	frame.sf_sig = sig;
594
595	bsd_to_linux_sigset(mask, &lmask);
596
597	/*
598	 * Build the signal context to be used by sigreturn.
599	 */
600	frame.sf_sc.sc_mask   = lmask.__bits[0];
601	frame.sf_sc.sc_gs     = rgs();
602	frame.sf_sc.sc_fs     = regs->tf_fs;
603	frame.sf_sc.sc_es     = regs->tf_es;
604	frame.sf_sc.sc_ds     = regs->tf_ds;
605	frame.sf_sc.sc_edi    = regs->tf_edi;
606	frame.sf_sc.sc_esi    = regs->tf_esi;
607	frame.sf_sc.sc_ebp    = regs->tf_ebp;
608	frame.sf_sc.sc_ebx    = regs->tf_ebx;
609	frame.sf_sc.sc_edx    = regs->tf_edx;
610	frame.sf_sc.sc_ecx    = regs->tf_ecx;
611	frame.sf_sc.sc_eax    = regs->tf_eax;
612	frame.sf_sc.sc_eip    = regs->tf_eip;
613	frame.sf_sc.sc_cs     = regs->tf_cs;
614	frame.sf_sc.sc_eflags = regs->tf_eflags;
615	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
616	frame.sf_sc.sc_ss     = regs->tf_ss;
617	frame.sf_sc.sc_err    = regs->tf_err;
618	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
619	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
620
621	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
622		frame.sf_extramask[i] = lmask.__bits[i+1];
623
624	if (copyout(&frame, fp, sizeof(frame)) != 0) {
625		/*
626		 * Process has trashed its stack; give it an illegal
627		 * instruction to halt it in its tracks.
628		 */
629		PROC_LOCK(p);
630		sigexit(td, SIGILL);
631	}
632
633	/*
634	 * Build context to run handler in.
635	 */
636	regs->tf_esp = (int)fp;
637	regs->tf_eip = p->p_sysent->sv_sigcode_base;
638	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
639	regs->tf_cs = _ucodesel;
640	regs->tf_ds = _udatasel;
641	regs->tf_es = _udatasel;
642	regs->tf_fs = _udatasel;
643	regs->tf_ss = _udatasel;
644	PROC_LOCK(p);
645	mtx_lock(&psp->ps_mtx);
646}
647
648/*
649 * System call to cleanup state after a signal
650 * has been taken.  Reset signal mask and
651 * stack state from context left by sendsig (above).
652 * Return to previous pc and psl as specified by
653 * context left by sendsig. Check carefully to
654 * make sure that the user has not modified the
655 * psl to gain improper privileges or to cause
656 * a machine fault.
657 */
658int
659linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
660{
661	struct l_sigframe frame;
662	struct trapframe *regs;
663	l_sigset_t lmask;
664	sigset_t bmask;
665	int eflags, i;
666	ksiginfo_t ksi;
667
668	regs = td->td_frame;
669
670#ifdef DEBUG
671	if (ldebug(sigreturn))
672		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
673#endif
674	/*
675	 * The trampoline code hands us the sigframe.
676	 * It is unsafe to keep track of it ourselves, in the event that a
677	 * program jumps out of a signal handler.
678	 */
679	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
680		return (EFAULT);
681
682	/*
683	 * Check for security violations.
684	 */
685#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
686	eflags = frame.sf_sc.sc_eflags;
687	/*
688	 * XXX do allow users to change the privileged flag PSL_RF.  The
689	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
690	 * sometimes set it there too.  tf_eflags is kept in the signal
691	 * context during signal handling and there is no other place
692	 * to remember it, so the PSL_RF bit may be corrupted by the
693	 * signal handler without us knowing.  Corruption of the PSL_RF
694	 * bit at worst causes one more or one less debugger trap, so
695	 * allowing it is fairly harmless.
696	 */
697	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
698		return(EINVAL);
699
700	/*
701	 * Don't allow users to load a valid privileged %cs.  Let the
702	 * hardware check for invalid selectors, excess privilege in
703	 * other selectors, invalid %eip's and invalid %esp's.
704	 */
705#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
706	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
707		ksiginfo_init_trap(&ksi);
708		ksi.ksi_signo = SIGBUS;
709		ksi.ksi_code = BUS_OBJERR;
710		ksi.ksi_trapno = T_PROTFLT;
711		ksi.ksi_addr = (void *)regs->tf_eip;
712		trapsignal(td, &ksi);
713		return(EINVAL);
714	}
715
716	lmask.__bits[0] = frame.sf_sc.sc_mask;
717	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
718		lmask.__bits[i+1] = frame.sf_extramask[i];
719	linux_to_bsd_sigset(&lmask, &bmask);
720	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
721
722	/*
723	 * Restore signal context.
724	 */
725	/* %gs was restored by the trampoline. */
726	regs->tf_fs     = frame.sf_sc.sc_fs;
727	regs->tf_es     = frame.sf_sc.sc_es;
728	regs->tf_ds     = frame.sf_sc.sc_ds;
729	regs->tf_edi    = frame.sf_sc.sc_edi;
730	regs->tf_esi    = frame.sf_sc.sc_esi;
731	regs->tf_ebp    = frame.sf_sc.sc_ebp;
732	regs->tf_ebx    = frame.sf_sc.sc_ebx;
733	regs->tf_edx    = frame.sf_sc.sc_edx;
734	regs->tf_ecx    = frame.sf_sc.sc_ecx;
735	regs->tf_eax    = frame.sf_sc.sc_eax;
736	regs->tf_eip    = frame.sf_sc.sc_eip;
737	regs->tf_cs     = frame.sf_sc.sc_cs;
738	regs->tf_eflags = eflags;
739	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
740	regs->tf_ss     = frame.sf_sc.sc_ss;
741
742	return (EJUSTRETURN);
743}
744
745/*
746 * System call to cleanup state after a signal
747 * has been taken.  Reset signal mask and
748 * stack state from context left by rt_sendsig (above).
749 * Return to previous pc and psl as specified by
750 * context left by sendsig. Check carefully to
751 * make sure that the user has not modified the
752 * psl to gain improper privileges or to cause
753 * a machine fault.
754 */
755int
756linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
757{
758	struct l_ucontext uc;
759	struct l_sigcontext *context;
760	sigset_t bmask;
761	l_stack_t *lss;
762	stack_t ss;
763	struct trapframe *regs;
764	int eflags;
765	ksiginfo_t ksi;
766
767	regs = td->td_frame;
768
769#ifdef DEBUG
770	if (ldebug(rt_sigreturn))
771		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
772#endif
773	/*
774	 * The trampoline code hands us the ucontext.
775	 * It is unsafe to keep track of it ourselves, in the event that a
776	 * program jumps out of a signal handler.
777	 */
778	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
779		return (EFAULT);
780
781	context = &uc.uc_mcontext;
782
783	/*
784	 * Check for security violations.
785	 */
786#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
787	eflags = context->sc_eflags;
788	/*
789	 * XXX do allow users to change the privileged flag PSL_RF.  The
790	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
791	 * sometimes set it there too.  tf_eflags is kept in the signal
792	 * context during signal handling and there is no other place
793	 * to remember it, so the PSL_RF bit may be corrupted by the
794	 * signal handler without us knowing.  Corruption of the PSL_RF
795	 * bit at worst causes one more or one less debugger trap, so
796	 * allowing it is fairly harmless.
797	 */
798	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
799		return(EINVAL);
800
801	/*
802	 * Don't allow users to load a valid privileged %cs.  Let the
803	 * hardware check for invalid selectors, excess privilege in
804	 * other selectors, invalid %eip's and invalid %esp's.
805	 */
806#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
807	if (!CS_SECURE(context->sc_cs)) {
808		ksiginfo_init_trap(&ksi);
809		ksi.ksi_signo = SIGBUS;
810		ksi.ksi_code = BUS_OBJERR;
811		ksi.ksi_trapno = T_PROTFLT;
812		ksi.ksi_addr = (void *)regs->tf_eip;
813		trapsignal(td, &ksi);
814		return(EINVAL);
815	}
816
817	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
818	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
819
820	/*
821	 * Restore signal context
822	 */
823	/* %gs was restored by the trampoline. */
824	regs->tf_fs     = context->sc_fs;
825	regs->tf_es     = context->sc_es;
826	regs->tf_ds     = context->sc_ds;
827	regs->tf_edi    = context->sc_edi;
828	regs->tf_esi    = context->sc_esi;
829	regs->tf_ebp    = context->sc_ebp;
830	regs->tf_ebx    = context->sc_ebx;
831	regs->tf_edx    = context->sc_edx;
832	regs->tf_ecx    = context->sc_ecx;
833	regs->tf_eax    = context->sc_eax;
834	regs->tf_eip    = context->sc_eip;
835	regs->tf_cs     = context->sc_cs;
836	regs->tf_eflags = eflags;
837	regs->tf_esp    = context->sc_esp_at_signal;
838	regs->tf_ss     = context->sc_ss;
839
840	/*
841	 * call sigaltstack & ignore results..
842	 */
843	lss = &uc.uc_stack;
844	ss.ss_sp = lss->ss_sp;
845	ss.ss_size = lss->ss_size;
846	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
847
848#ifdef DEBUG
849	if (ldebug(rt_sigreturn))
850		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
851		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
852#endif
853	(void)kern_sigaltstack(td, &ss, NULL);
854
855	return (EJUSTRETURN);
856}
857
858static int
859linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
860{
861	struct proc *p;
862	struct trapframe *frame;
863
864	p = td->td_proc;
865	frame = td->td_frame;
866
867	sa->code = frame->tf_eax;
868	sa->args[0] = frame->tf_ebx;
869	sa->args[1] = frame->tf_ecx;
870	sa->args[2] = frame->tf_edx;
871	sa->args[3] = frame->tf_esi;
872	sa->args[4] = frame->tf_edi;
873	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
874
875	if (sa->code >= p->p_sysent->sv_size)
876		sa->callp = &p->p_sysent->sv_table[0];
877 	else
878 		sa->callp = &p->p_sysent->sv_table[sa->code];
879	sa->narg = sa->callp->sy_narg;
880
881	td->td_retval[0] = 0;
882	td->td_retval[1] = frame->tf_edx;
883
884	return (0);
885}
886
887/*
888 * If a linux binary is exec'ing something, try this image activator
889 * first.  We override standard shell script execution in order to
890 * be able to modify the interpreter path.  We only do this if a linux
891 * binary is doing the exec, so we do not create an EXEC module for it.
892 */
893static int	exec_linux_imgact_try(struct image_params *iparams);
894
895static int
896exec_linux_imgact_try(struct image_params *imgp)
897{
898    const char *head = (const char *)imgp->image_header;
899    char *rpath;
900    int error = -1;
901
902    /*
903     * The interpreter for shell scripts run from a linux binary needs
904     * to be located in /compat/linux if possible in order to recursively
905     * maintain linux path emulation.
906     */
907    if (((const short *)head)[0] == SHELLMAGIC) {
908	    /*
909	     * Run our normal shell image activator.  If it succeeds attempt
910	     * to use the alternate path for the interpreter.  If an alternate
911	     * path is found, use our stringspace to store it.
912	     */
913	    if ((error = exec_shell_imgact(imgp)) == 0) {
914		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
915			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
916		    if (rpath != NULL)
917			    imgp->args->fname_buf =
918				imgp->interpreter_name = rpath;
919	    }
920    }
921    return (error);
922}
923
924/*
925 * exec_setregs may initialize some registers differently than Linux
926 * does, thus potentially confusing Linux binaries. If necessary, we
927 * override the exec_setregs default(s) here.
928 */
929static void
930exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
931{
932	struct pcb *pcb = td->td_pcb;
933
934	exec_setregs(td, imgp, stack);
935
936	/* Linux sets %gs to 0, we default to _udatasel */
937	pcb->pcb_gs = 0;
938	load_gs(0);
939
940	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
941}
942
943static void
944linux_get_machine(const char **dst)
945{
946
947	switch (cpu_class) {
948	case CPUCLASS_686:
949		*dst = "i686";
950		break;
951	case CPUCLASS_586:
952		*dst = "i586";
953		break;
954	case CPUCLASS_486:
955		*dst = "i486";
956		break;
957	default:
958		*dst = "i386";
959	}
960}
961
962struct sysentvec linux_sysvec = {
963	.sv_size	= LINUX_SYS_MAXSYSCALL,
964	.sv_table	= linux_sysent,
965	.sv_mask	= 0,
966	.sv_sigsize	= LINUX_SIGTBLSZ,
967	.sv_sigtbl	= bsd_to_linux_signal,
968	.sv_errsize	= ELAST + 1,
969	.sv_errtbl	= bsd_to_linux_errno,
970	.sv_transtrap	= translate_traps,
971	.sv_fixup	= linux_fixup,
972	.sv_sendsig	= linux_sendsig,
973	.sv_sigcode	= linux_sigcode,
974	.sv_szsigcode	= &linux_szsigcode,
975	.sv_prepsyscall	= NULL,
976	.sv_name	= "Linux a.out",
977	.sv_coredump	= NULL,
978	.sv_imgact_try	= exec_linux_imgact_try,
979	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
980	.sv_pagesize	= PAGE_SIZE,
981	.sv_minuser	= VM_MIN_ADDRESS,
982	.sv_maxuser	= VM_MAXUSER_ADDRESS,
983	.sv_usrstack	= LINUX_USRSTACK,
984	.sv_psstrings	= PS_STRINGS,
985	.sv_stackprot	= VM_PROT_ALL,
986	.sv_copyout_strings = exec_copyout_strings,
987	.sv_setregs	= exec_linux_setregs,
988	.sv_fixlimit	= NULL,
989	.sv_maxssiz	= NULL,
990	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
991	.sv_set_syscall_retval = cpu_set_syscall_retval,
992	.sv_fetch_syscall_args = linux_fetch_syscall_args,
993	.sv_syscallnames = NULL,
994	.sv_shared_page_base = LINUX_SHAREDPAGE,
995	.sv_shared_page_len = PAGE_SIZE,
996	.sv_schedtail	= linux_schedtail,
997};
998INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
999
1000struct sysentvec elf_linux_sysvec = {
1001	.sv_size	= LINUX_SYS_MAXSYSCALL,
1002	.sv_table	= linux_sysent,
1003	.sv_mask	= 0,
1004	.sv_sigsize	= LINUX_SIGTBLSZ,
1005	.sv_sigtbl	= bsd_to_linux_signal,
1006	.sv_errsize	= ELAST + 1,
1007	.sv_errtbl	= bsd_to_linux_errno,
1008	.sv_transtrap	= translate_traps,
1009	.sv_fixup	= elf_linux_fixup,
1010	.sv_sendsig	= linux_sendsig,
1011	.sv_sigcode	= linux_sigcode,
1012	.sv_szsigcode	= &linux_szsigcode,
1013	.sv_prepsyscall	= NULL,
1014	.sv_name	= "Linux ELF",
1015	.sv_coredump	= elf32_coredump,
1016	.sv_imgact_try	= exec_linux_imgact_try,
1017	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1018	.sv_pagesize	= PAGE_SIZE,
1019	.sv_minuser	= VM_MIN_ADDRESS,
1020	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1021	.sv_usrstack	= LINUX_USRSTACK,
1022	.sv_psstrings	= LINUX_PS_STRINGS,
1023	.sv_stackprot	= VM_PROT_ALL,
1024	.sv_copyout_strings = linux_copyout_strings,
1025	.sv_setregs	= exec_linux_setregs,
1026	.sv_fixlimit	= NULL,
1027	.sv_maxssiz	= NULL,
1028	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1029	.sv_set_syscall_retval = cpu_set_syscall_retval,
1030	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1031	.sv_syscallnames = NULL,
1032	.sv_shared_page_base = LINUX_SHAREDPAGE,
1033	.sv_shared_page_len = PAGE_SIZE,
1034	.sv_schedtail	= linux_schedtail,
1035};
1036INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1037
1038static char GNU_ABI_VENDOR[] = "GNU";
1039static int GNULINUX_ABI_DESC = 0;
1040
1041static boolean_t
1042linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1043{
1044	const Elf32_Word *desc;
1045	uintptr_t p;
1046
1047	p = (uintptr_t)(note + 1);
1048	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1049
1050	desc = (const Elf32_Word *)p;
1051	if (desc[0] != GNULINUX_ABI_DESC)
1052		return (FALSE);
1053
1054	/*
1055	 * For linux we encode osrel as follows (see linux_mib.c):
1056	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1057	 */
1058	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1059
1060	return (TRUE);
1061}
1062
1063static Elf_Brandnote linux_brandnote = {
1064	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1065	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1066	.hdr.n_type	= 1,
1067	.vendor		= GNU_ABI_VENDOR,
1068	.flags		= BN_TRANSLATE_OSREL,
1069	.trans_osrel	= linux_trans_osrel
1070};
1071
1072static Elf32_Brandinfo linux_brand = {
1073	.brand		= ELFOSABI_LINUX,
1074	.machine	= EM_386,
1075	.compat_3_brand	= "Linux",
1076	.emul_path	= "/compat/linux",
1077	.interp_path	= "/lib/ld-linux.so.1",
1078	.sysvec		= &elf_linux_sysvec,
1079	.interp_newpath	= NULL,
1080	.brand_note	= &linux_brandnote,
1081	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1082};
1083
1084static Elf32_Brandinfo linux_glibc2brand = {
1085	.brand		= ELFOSABI_LINUX,
1086	.machine	= EM_386,
1087	.compat_3_brand	= "Linux",
1088	.emul_path	= "/compat/linux",
1089	.interp_path	= "/lib/ld-linux.so.2",
1090	.sysvec		= &elf_linux_sysvec,
1091	.interp_newpath	= NULL,
1092	.brand_note	= &linux_brandnote,
1093	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1094};
1095
1096Elf32_Brandinfo *linux_brandlist[] = {
1097	&linux_brand,
1098	&linux_glibc2brand,
1099	NULL
1100};
1101
1102static int
1103linux_elf_modevent(module_t mod, int type, void *data)
1104{
1105	Elf32_Brandinfo **brandinfo;
1106	int error;
1107	struct linux_ioctl_handler **lihp;
1108	struct linux_device_handler **ldhp;
1109
1110	error = 0;
1111
1112	switch(type) {
1113	case MOD_LOAD:
1114		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1115		     ++brandinfo)
1116			if (elf32_insert_brand_entry(*brandinfo) < 0)
1117				error = EINVAL;
1118		if (error == 0) {
1119			SET_FOREACH(lihp, linux_ioctl_handler_set)
1120				linux_ioctl_register_handler(*lihp);
1121			SET_FOREACH(ldhp, linux_device_handler_set)
1122				linux_device_register_handler(*ldhp);
1123			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1124			sx_init(&emul_shared_lock, "emuldata->shared lock");
1125			LIST_INIT(&futex_list);
1126			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1127			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1128			      NULL, 1000);
1129			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1130			      NULL, 1000);
1131			linux_get_machine(&linux_platform);
1132			linux_szplatform = roundup(strlen(linux_platform) + 1,
1133			    sizeof(char *));
1134			linux_osd_jail_register();
1135			stclohz = (stathz ? stathz : hz);
1136			if (bootverbose)
1137				printf("Linux ELF exec handler installed\n");
1138		} else
1139			printf("cannot insert Linux ELF brand handler\n");
1140		break;
1141	case MOD_UNLOAD:
1142		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1143		     ++brandinfo)
1144			if (elf32_brand_inuse(*brandinfo))
1145				error = EBUSY;
1146		if (error == 0) {
1147			for (brandinfo = &linux_brandlist[0];
1148			     *brandinfo != NULL; ++brandinfo)
1149				if (elf32_remove_brand_entry(*brandinfo) < 0)
1150					error = EINVAL;
1151		}
1152		if (error == 0) {
1153			SET_FOREACH(lihp, linux_ioctl_handler_set)
1154				linux_ioctl_unregister_handler(*lihp);
1155			SET_FOREACH(ldhp, linux_device_handler_set)
1156				linux_device_unregister_handler(*ldhp);
1157			mtx_destroy(&emul_lock);
1158			sx_destroy(&emul_shared_lock);
1159			mtx_destroy(&futex_mtx);
1160			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1161			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1162			linux_osd_jail_deregister();
1163			if (bootverbose)
1164				printf("Linux ELF exec handler removed\n");
1165		} else
1166			printf("Could not deinstall ELF interpreter entry\n");
1167		break;
1168	default:
1169		return EOPNOTSUPP;
1170	}
1171	return error;
1172}
1173
1174static moduledata_t linux_elf_mod = {
1175	"linuxelf",
1176	linux_elf_modevent,
1177	0
1178};
1179
1180DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1181