1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 Søren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer
14 *    in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD$");
35#include "opt_compat.h"
36
37#ifndef COMPAT_FREEBSD32
38#error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
39#endif
40
41#define	__ELF_WORD_SIZE	32
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/exec.h>
46#include <sys/fcntl.h>
47#include <sys/imgact.h>
48#include <sys/imgact_elf.h>
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/module.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/signalvar.h>
57#include <sys/sysctl.h>
58#include <sys/syscallsubr.h>
59#include <sys/sysent.h>
60#include <sys/sysproto.h>
61#include <sys/vnode.h>
62#include <sys/eventhandler.h>
63
64#include <vm/vm.h>
65#include <vm/pmap.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_page.h>
70#include <vm/vm_param.h>
71
72#include <machine/cpu.h>
73#include <machine/md_var.h>
74#include <machine/pcb.h>
75#include <machine/specialreg.h>
76
77#include <amd64/linux32/linux.h>
78#include <amd64/linux32/linux32_proto.h>
79#include <compat/linux/linux_emul.h>
80#include <compat/linux/linux_futex.h>
81#include <compat/linux/linux_ioctl.h>
82#include <compat/linux/linux_mib.h>
83#include <compat/linux/linux_misc.h>
84#include <compat/linux/linux_signal.h>
85#include <compat/linux/linux_util.h>
86
87MODULE_VERSION(linux, 1);
88
89MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
90
91#define	AUXARGS_ENTRY_32(pos, id, val)	\
92	do {				\
93		suword32(pos++, id);	\
94		suword32(pos++, val);	\
95	} while (0)
96
97#if BYTE_ORDER == LITTLE_ENDIAN
98#define SHELLMAGIC      0x2123 /* #! */
99#else
100#define SHELLMAGIC      0x2321
101#endif
102
103/*
104 * Allow the sendsig functions to use the ldebug() facility
105 * even though they are not syscalls themselves. Map them
106 * to syscall 0. This is slightly less bogus than using
107 * ldebug(sigreturn).
108 */
109#define	LINUX_SYS_linux_rt_sendsig	0
110#define	LINUX_SYS_linux_sendsig		0
111
112const char *linux_platform = "i686";
113static int linux_szplatform;
114extern char linux_sigcode[];
115extern int linux_szsigcode;
116
117extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
118
119SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
120SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
121
122static int	elf_linux_fixup(register_t **stack_base,
123		    struct image_params *iparams);
124static register_t *linux_copyout_strings(struct image_params *imgp);
125static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
126static void	exec_linux_setregs(struct thread *td,
127				   struct image_params *imgp, u_long stack);
128static void	linux32_fixlimit(struct rlimit *rl, int which);
129static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
130
131static eventhandler_tag linux_exit_tag;
132static eventhandler_tag linux_exec_tag;
133
134/*
135 * Linux syscalls return negative errno's, we do positive and map them
136 * Reference:
137 *   FreeBSD: src/sys/sys/errno.h
138 *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
139 *            linux-2.6.17.8/include/asm-generic/errno.h
140 */
141static int bsd_to_linux_errno[ELAST + 1] = {
142	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
143	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
144	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
145	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
146	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
147	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
148	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
149	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
150	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
151	 -72, -67, -71
152};
153
154int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
155	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
156	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
157	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
158	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
159	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
160	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
161	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
162	0, LINUX_SIGUSR1, LINUX_SIGUSR2
163};
164
165int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
166	SIGHUP, SIGINT, SIGQUIT, SIGILL,
167	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
168	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
169	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
170	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
171	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
172	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
173	SIGIO, SIGURG, SIGSYS
174};
175
176#define LINUX_T_UNKNOWN  255
177static int _bsd_to_linux_trapcode[] = {
178	LINUX_T_UNKNOWN,	/* 0 */
179	6,			/* 1  T_PRIVINFLT */
180	LINUX_T_UNKNOWN,	/* 2 */
181	3,			/* 3  T_BPTFLT */
182	LINUX_T_UNKNOWN,	/* 4 */
183	LINUX_T_UNKNOWN,	/* 5 */
184	16,			/* 6  T_ARITHTRAP */
185	254,			/* 7  T_ASTFLT */
186	LINUX_T_UNKNOWN,	/* 8 */
187	13,			/* 9  T_PROTFLT */
188	1,			/* 10 T_TRCTRAP */
189	LINUX_T_UNKNOWN,	/* 11 */
190	14,			/* 12 T_PAGEFLT */
191	LINUX_T_UNKNOWN,	/* 13 */
192	17,			/* 14 T_ALIGNFLT */
193	LINUX_T_UNKNOWN,	/* 15 */
194	LINUX_T_UNKNOWN,	/* 16 */
195	LINUX_T_UNKNOWN,	/* 17 */
196	0,			/* 18 T_DIVIDE */
197	2,			/* 19 T_NMI */
198	4,			/* 20 T_OFLOW */
199	5,			/* 21 T_BOUND */
200	7,			/* 22 T_DNA */
201	8,			/* 23 T_DOUBLEFLT */
202	9,			/* 24 T_FPOPFLT */
203	10,			/* 25 T_TSSFLT */
204	11,			/* 26 T_SEGNPFLT */
205	12,			/* 27 T_STKFLT */
206	18,			/* 28 T_MCHK */
207	19,			/* 29 T_XMMFLT */
208	15			/* 30 T_RESERVED */
209};
210#define bsd_to_linux_trapcode(code) \
211    ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
212     _bsd_to_linux_trapcode[(code)]: \
213     LINUX_T_UNKNOWN)
214
215struct linux32_ps_strings {
216	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
217	u_int ps_nargvstr;	/* the number of argument strings */
218	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
219	u_int ps_nenvstr;	/* the number of environment strings */
220};
221
222/*
223 * If FreeBSD & Linux have a difference of opinion about what a trap
224 * means, deal with it here.
225 *
226 * MPSAFE
227 */
228static int
229translate_traps(int signal, int trap_code)
230{
231	if (signal != SIGBUS)
232		return signal;
233	switch (trap_code) {
234	case T_PROTFLT:
235	case T_TSSFLT:
236	case T_DOUBLEFLT:
237	case T_PAGEFLT:
238		return SIGSEGV;
239	default:
240		return signal;
241	}
242}
243
244static int
245elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
246{
247	Elf32_Auxargs *args;
248	Elf32_Addr *base;
249	Elf32_Addr *pos, *uplatform;
250	struct linux32_ps_strings *arginfo;
251
252	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
253	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
254
255	KASSERT(curthread->td_proc == imgp->proc,
256	    ("unsafe elf_linux_fixup(), should be curproc"));
257	base = (Elf32_Addr *)*stack_base;
258	args = (Elf32_Auxargs *)imgp->auxargs;
259	pos = base + (imgp->args->argc + imgp->args->envc + 2);
260
261	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
262
263	/*
264	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
265	 * as it has appeared in the 2.4.0-rc7 first time.
266	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
267	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
268	 * is not present.
269	 * Also see linux_times() implementation.
270	 */
271	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
272		AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
273	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
274	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
275	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
276	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
277	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
278	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
279	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
280	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
281	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
282	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
283	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
284	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
285	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
286	if (args->execfd != -1)
287		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
288	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
289
290	free(imgp->auxargs, M_TEMP);
291	imgp->auxargs = NULL;
292
293	base--;
294	suword32(base, (uint32_t)imgp->args->argc);
295	*stack_base = (register_t *)base;
296	return 0;
297}
298
299extern unsigned long linux_sznonrtsigcode;
300
301static void
302linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
303{
304	struct thread *td = curthread;
305	struct proc *p = td->td_proc;
306	struct sigacts *psp;
307	struct trapframe *regs;
308	struct l_rt_sigframe *fp, frame;
309	int oonstack;
310	int sig;
311	int code;
312
313	sig = ksi->ksi_signo;
314	code = ksi->ksi_code;
315	PROC_LOCK_ASSERT(p, MA_OWNED);
316	psp = p->p_sigacts;
317	mtx_assert(&psp->ps_mtx, MA_OWNED);
318	regs = td->td_frame;
319	oonstack = sigonstack(regs->tf_rsp);
320
321#ifdef DEBUG
322	if (ldebug(rt_sendsig))
323		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
324		    catcher, sig, (void*)mask, code);
325#endif
326	/*
327	 * Allocate space for the signal handler context.
328	 */
329	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
330	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
331		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
332		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
333	} else
334		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
335	mtx_unlock(&psp->ps_mtx);
336
337	/*
338	 * Build the argument list for the signal handler.
339	 */
340	if (p->p_sysent->sv_sigtbl)
341		if (sig <= p->p_sysent->sv_sigsize)
342			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
343
344	bzero(&frame, sizeof(frame));
345
346	frame.sf_handler = PTROUT(catcher);
347	frame.sf_sig = sig;
348	frame.sf_siginfo = PTROUT(&fp->sf_si);
349	frame.sf_ucontext = PTROUT(&fp->sf_sc);
350
351	/* Fill in POSIX parts */
352	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
353
354	/*
355	 * Build the signal context to be used by sigreturn.
356	 */
357	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
358	frame.sf_sc.uc_link = 0;		/* XXX ??? */
359
360	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
361	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
362	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
363	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
364	PROC_UNLOCK(p);
365
366	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
367
368	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
369	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
370	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
371	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
372	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
373	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
374	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
375	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
376	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
377	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
378	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
379	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
380	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
381	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
382	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
383	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
384	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
385	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
386	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
387	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
388
389#ifdef DEBUG
390	if (ldebug(rt_sendsig))
391		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
392		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
393		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
394#endif
395
396	if (copyout(&frame, fp, sizeof(frame)) != 0) {
397		/*
398		 * Process has trashed its stack; give it an illegal
399		 * instruction to halt it in its tracks.
400		 */
401#ifdef DEBUG
402		if (ldebug(rt_sendsig))
403			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
404			    fp, oonstack);
405#endif
406		PROC_LOCK(p);
407		sigexit(td, SIGILL);
408	}
409
410	/*
411	 * Build context to run handler in.
412	 */
413	regs->tf_rsp = PTROUT(fp);
414	regs->tf_rip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
415	regs->tf_rflags &= ~(PSL_T | PSL_D);
416	regs->tf_cs = _ucode32sel;
417	regs->tf_ss = _udatasel;
418	regs->tf_ds = _udatasel;
419	regs->tf_es = _udatasel;
420	regs->tf_fs = _ufssel;
421	regs->tf_gs = _ugssel;
422	regs->tf_flags = TF_HASSEGS;
423	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
424	PROC_LOCK(p);
425	mtx_lock(&psp->ps_mtx);
426}
427
428
429/*
430 * Send an interrupt to process.
431 *
432 * Stack is set up to allow sigcode stored
433 * in u. to call routine, followed by kcall
434 * to sigreturn routine below.  After sigreturn
435 * resets the signal mask, the stack, and the
436 * frame pointer, it returns to the user
437 * specified pc, psl.
438 */
439static void
440linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
441{
442	struct thread *td = curthread;
443	struct proc *p = td->td_proc;
444	struct sigacts *psp;
445	struct trapframe *regs;
446	struct l_sigframe *fp, frame;
447	l_sigset_t lmask;
448	int oonstack, i;
449	int sig, code;
450
451	sig = ksi->ksi_signo;
452	code = ksi->ksi_code;
453	PROC_LOCK_ASSERT(p, MA_OWNED);
454	psp = p->p_sigacts;
455	mtx_assert(&psp->ps_mtx, MA_OWNED);
456	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
457		/* Signal handler installed with SA_SIGINFO. */
458		linux_rt_sendsig(catcher, ksi, mask);
459		return;
460	}
461
462	regs = td->td_frame;
463	oonstack = sigonstack(regs->tf_rsp);
464
465#ifdef DEBUG
466	if (ldebug(sendsig))
467		printf(ARGS(sendsig, "%p, %d, %p, %u"),
468		    catcher, sig, (void*)mask, code);
469#endif
470
471	/*
472	 * Allocate space for the signal handler context.
473	 */
474	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
475	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
476		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
477		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
478	} else
479		fp = (struct l_sigframe *)regs->tf_rsp - 1;
480	mtx_unlock(&psp->ps_mtx);
481	PROC_UNLOCK(p);
482
483	/*
484	 * Build the argument list for the signal handler.
485	 */
486	if (p->p_sysent->sv_sigtbl)
487		if (sig <= p->p_sysent->sv_sigsize)
488			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
489
490	bzero(&frame, sizeof(frame));
491
492	frame.sf_handler = PTROUT(catcher);
493	frame.sf_sig = sig;
494
495	bsd_to_linux_sigset(mask, &lmask);
496
497	/*
498	 * Build the signal context to be used by sigreturn.
499	 */
500	frame.sf_sc.sc_mask   = lmask.__bits[0];
501	frame.sf_sc.sc_gs     = regs->tf_gs;
502	frame.sf_sc.sc_fs     = regs->tf_fs;
503	frame.sf_sc.sc_es     = regs->tf_es;
504	frame.sf_sc.sc_ds     = regs->tf_ds;
505	frame.sf_sc.sc_edi    = regs->tf_rdi;
506	frame.sf_sc.sc_esi    = regs->tf_rsi;
507	frame.sf_sc.sc_ebp    = regs->tf_rbp;
508	frame.sf_sc.sc_ebx    = regs->tf_rbx;
509	frame.sf_sc.sc_edx    = regs->tf_rdx;
510	frame.sf_sc.sc_ecx    = regs->tf_rcx;
511	frame.sf_sc.sc_eax    = regs->tf_rax;
512	frame.sf_sc.sc_eip    = regs->tf_rip;
513	frame.sf_sc.sc_cs     = regs->tf_cs;
514	frame.sf_sc.sc_eflags = regs->tf_rflags;
515	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
516	frame.sf_sc.sc_ss     = regs->tf_ss;
517	frame.sf_sc.sc_err    = regs->tf_err;
518	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
519	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
520
521	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
522		frame.sf_extramask[i] = lmask.__bits[i+1];
523
524	if (copyout(&frame, fp, sizeof(frame)) != 0) {
525		/*
526		 * Process has trashed its stack; give it an illegal
527		 * instruction to halt it in its tracks.
528		 */
529		PROC_LOCK(p);
530		sigexit(td, SIGILL);
531	}
532
533	/*
534	 * Build context to run handler in.
535	 */
536	regs->tf_rsp = PTROUT(fp);
537	regs->tf_rip = p->p_sysent->sv_sigcode_base;
538	regs->tf_rflags &= ~(PSL_T | PSL_D);
539	regs->tf_cs = _ucode32sel;
540	regs->tf_ss = _udatasel;
541	regs->tf_ds = _udatasel;
542	regs->tf_es = _udatasel;
543	regs->tf_fs = _ufssel;
544	regs->tf_gs = _ugssel;
545	regs->tf_flags = TF_HASSEGS;
546	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
547	PROC_LOCK(p);
548	mtx_lock(&psp->ps_mtx);
549}
550
551/*
552 * System call to cleanup state after a signal
553 * has been taken.  Reset signal mask and
554 * stack state from context left by sendsig (above).
555 * Return to previous pc and psl as specified by
556 * context left by sendsig. Check carefully to
557 * make sure that the user has not modified the
558 * psl to gain improper privileges or to cause
559 * a machine fault.
560 */
561int
562linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
563{
564	struct l_sigframe frame;
565	struct trapframe *regs;
566	sigset_t bmask;
567	l_sigset_t lmask;
568	int eflags, i;
569	ksiginfo_t ksi;
570
571	regs = td->td_frame;
572
573#ifdef DEBUG
574	if (ldebug(sigreturn))
575		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
576#endif
577	/*
578	 * The trampoline code hands us the sigframe.
579	 * It is unsafe to keep track of it ourselves, in the event that a
580	 * program jumps out of a signal handler.
581	 */
582	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
583		return (EFAULT);
584
585	/*
586	 * Check for security violations.
587	 */
588#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
589	eflags = frame.sf_sc.sc_eflags;
590	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
591		return(EINVAL);
592
593	/*
594	 * Don't allow users to load a valid privileged %cs.  Let the
595	 * hardware check for invalid selectors, excess privilege in
596	 * other selectors, invalid %eip's and invalid %esp's.
597	 */
598#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
599	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
600		ksiginfo_init_trap(&ksi);
601		ksi.ksi_signo = SIGBUS;
602		ksi.ksi_code = BUS_OBJERR;
603		ksi.ksi_trapno = T_PROTFLT;
604		ksi.ksi_addr = (void *)regs->tf_rip;
605		trapsignal(td, &ksi);
606		return(EINVAL);
607	}
608
609	lmask.__bits[0] = frame.sf_sc.sc_mask;
610	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
611		lmask.__bits[i+1] = frame.sf_extramask[i];
612	linux_to_bsd_sigset(&lmask, &bmask);
613	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
614
615	/*
616	 * Restore signal context.
617	 */
618	regs->tf_rdi    = frame.sf_sc.sc_edi;
619	regs->tf_rsi    = frame.sf_sc.sc_esi;
620	regs->tf_rbp    = frame.sf_sc.sc_ebp;
621	regs->tf_rbx    = frame.sf_sc.sc_ebx;
622	regs->tf_rdx    = frame.sf_sc.sc_edx;
623	regs->tf_rcx    = frame.sf_sc.sc_ecx;
624	regs->tf_rax    = frame.sf_sc.sc_eax;
625	regs->tf_rip    = frame.sf_sc.sc_eip;
626	regs->tf_cs     = frame.sf_sc.sc_cs;
627	regs->tf_ds     = frame.sf_sc.sc_ds;
628	regs->tf_es     = frame.sf_sc.sc_es;
629	regs->tf_fs     = frame.sf_sc.sc_fs;
630	regs->tf_gs     = frame.sf_sc.sc_gs;
631	regs->tf_rflags = eflags;
632	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
633	regs->tf_ss     = frame.sf_sc.sc_ss;
634	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
635
636	return (EJUSTRETURN);
637}
638
639/*
640 * System call to cleanup state after a signal
641 * has been taken.  Reset signal mask and
642 * stack state from context left by rt_sendsig (above).
643 * Return to previous pc and psl as specified by
644 * context left by sendsig. Check carefully to
645 * make sure that the user has not modified the
646 * psl to gain improper privileges or to cause
647 * a machine fault.
648 */
649int
650linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
651{
652	struct l_ucontext uc;
653	struct l_sigcontext *context;
654	sigset_t bmask;
655	l_stack_t *lss;
656	stack_t ss;
657	struct trapframe *regs;
658	int eflags;
659	ksiginfo_t ksi;
660
661	regs = td->td_frame;
662
663#ifdef DEBUG
664	if (ldebug(rt_sigreturn))
665		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
666#endif
667	/*
668	 * The trampoline code hands us the ucontext.
669	 * It is unsafe to keep track of it ourselves, in the event that a
670	 * program jumps out of a signal handler.
671	 */
672	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
673		return (EFAULT);
674
675	context = &uc.uc_mcontext;
676
677	/*
678	 * Check for security violations.
679	 */
680#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
681	eflags = context->sc_eflags;
682	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
683		return(EINVAL);
684
685	/*
686	 * Don't allow users to load a valid privileged %cs.  Let the
687	 * hardware check for invalid selectors, excess privilege in
688	 * other selectors, invalid %eip's and invalid %esp's.
689	 */
690#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
691	if (!CS_SECURE(context->sc_cs)) {
692		ksiginfo_init_trap(&ksi);
693		ksi.ksi_signo = SIGBUS;
694		ksi.ksi_code = BUS_OBJERR;
695		ksi.ksi_trapno = T_PROTFLT;
696		ksi.ksi_addr = (void *)regs->tf_rip;
697		trapsignal(td, &ksi);
698		return(EINVAL);
699	}
700
701	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
702	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
703
704	/*
705	 * Restore signal context
706	 */
707	regs->tf_gs	= context->sc_gs;
708	regs->tf_fs	= context->sc_fs;
709	regs->tf_es	= context->sc_es;
710	regs->tf_ds	= context->sc_ds;
711	regs->tf_rdi    = context->sc_edi;
712	regs->tf_rsi    = context->sc_esi;
713	regs->tf_rbp    = context->sc_ebp;
714	regs->tf_rbx    = context->sc_ebx;
715	regs->tf_rdx    = context->sc_edx;
716	regs->tf_rcx    = context->sc_ecx;
717	regs->tf_rax    = context->sc_eax;
718	regs->tf_rip    = context->sc_eip;
719	regs->tf_cs     = context->sc_cs;
720	regs->tf_rflags = eflags;
721	regs->tf_rsp    = context->sc_esp_at_signal;
722	regs->tf_ss     = context->sc_ss;
723	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
724
725	/*
726	 * call sigaltstack & ignore results..
727	 */
728	lss = &uc.uc_stack;
729	ss.ss_sp = PTRIN(lss->ss_sp);
730	ss.ss_size = lss->ss_size;
731	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
732
733#ifdef DEBUG
734	if (ldebug(rt_sigreturn))
735		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
736		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
737#endif
738	(void)kern_sigaltstack(td, &ss, NULL);
739
740	return (EJUSTRETURN);
741}
742
743static int
744linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
745{
746	struct proc *p;
747	struct trapframe *frame;
748
749	p = td->td_proc;
750	frame = td->td_frame;
751
752	sa->args[0] = frame->tf_rbx;
753	sa->args[1] = frame->tf_rcx;
754	sa->args[2] = frame->tf_rdx;
755	sa->args[3] = frame->tf_rsi;
756	sa->args[4] = frame->tf_rdi;
757	sa->args[5] = frame->tf_rbp;	/* Unconfirmed */
758	sa->code = frame->tf_rax;
759
760	if (sa->code >= p->p_sysent->sv_size)
761		sa->callp = &p->p_sysent->sv_table[0];
762	else
763		sa->callp = &p->p_sysent->sv_table[sa->code];
764	sa->narg = sa->callp->sy_narg;
765
766	td->td_retval[0] = 0;
767	td->td_retval[1] = frame->tf_rdx;
768
769	return (0);
770}
771
772/*
773 * If a linux binary is exec'ing something, try this image activator
774 * first.  We override standard shell script execution in order to
775 * be able to modify the interpreter path.  We only do this if a linux
776 * binary is doing the exec, so we do not create an EXEC module for it.
777 */
778static int	exec_linux_imgact_try(struct image_params *iparams);
779
780static int
781exec_linux_imgact_try(struct image_params *imgp)
782{
783	const char *head = (const char *)imgp->image_header;
784	char *rpath;
785	int error = -1;
786
787	/*
788	* The interpreter for shell scripts run from a linux binary needs
789	* to be located in /compat/linux if possible in order to recursively
790	* maintain linux path emulation.
791	*/
792	if (((const short *)head)[0] == SHELLMAGIC) {
793		/*
794		* Run our normal shell image activator.  If it succeeds attempt
795		* to use the alternate path for the interpreter.  If an
796		* alternate * path is found, use our stringspace to store it.
797		*/
798		if ((error = exec_shell_imgact(imgp)) == 0) {
799			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
800			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
801			    AT_FDCWD);
802			if (rpath != NULL)
803				imgp->args->fname_buf =
804				    imgp->interpreter_name = rpath;
805		}
806	}
807	return (error);
808}
809
810/*
811 * Clear registers on exec
812 * XXX copied from ia32_signal.c.
813 */
814static void
815exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
816{
817	struct trapframe *regs = td->td_frame;
818	struct pcb *pcb = td->td_pcb;
819
820	mtx_lock(&dt_lock);
821	if (td->td_proc->p_md.md_ldt != NULL)
822		user_ldt_free(td);
823	else
824		mtx_unlock(&dt_lock);
825
826	critical_enter();
827	wrmsr(MSR_FSBASE, 0);
828	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
829	pcb->pcb_fsbase = 0;
830	pcb->pcb_gsbase = 0;
831	critical_exit();
832	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
833
834	bzero((char *)regs, sizeof(struct trapframe));
835	regs->tf_rip = imgp->entry_addr;
836	regs->tf_rsp = stack;
837	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
838	regs->tf_gs = _ugssel;
839	regs->tf_fs = _ufssel;
840	regs->tf_es = _udatasel;
841	regs->tf_ds = _udatasel;
842	regs->tf_ss = _udatasel;
843	regs->tf_flags = TF_HASSEGS;
844	regs->tf_cs = _ucode32sel;
845	regs->tf_rbx = imgp->ps_strings;
846
847	fpstate_drop(td);
848
849	/* Do full restore on return so that we can change to a different %cs */
850	set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
851	td->td_retval[1] = 0;
852}
853
854/*
855 * XXX copied from ia32_sysvec.c.
856 */
857static register_t *
858linux_copyout_strings(struct image_params *imgp)
859{
860	int argc, envc;
861	u_int32_t *vectp;
862	char *stringp, *destp;
863	u_int32_t *stack_base;
864	struct linux32_ps_strings *arginfo;
865
866	/*
867	 * Calculate string base and vector table pointers.
868	 * Also deal with signal trampoline code for this exec type.
869	 */
870	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
871	destp =	(caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
872	    roundup((ARG_MAX - imgp->args->stringspace),
873	    sizeof(char *));
874
875	/*
876	 * Install LINUX_PLATFORM
877	 */
878	copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
879	    linux_szplatform);
880
881	/*
882	 * If we have a valid auxargs ptr, prepare some room
883	 * on the stack.
884	 */
885	if (imgp->auxargs) {
886		/*
887		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
888		 * lower compatibility.
889		 */
890		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
891		    (LINUX_AT_COUNT * 2);
892		/*
893		 * The '+ 2' is for the null pointers at the end of each of
894		 * the arg and env vector sets,and imgp->auxarg_size is room
895		 * for argument of Runtime loader.
896		 */
897		vectp = (u_int32_t *) (destp - (imgp->args->argc +
898		    imgp->args->envc + 2 + imgp->auxarg_size) *
899		    sizeof(u_int32_t));
900
901	} else
902		/*
903		 * The '+ 2' is for the null pointers at the end of each of
904		 * the arg and env vector sets
905		 */
906		vectp = (u_int32_t *)(destp - (imgp->args->argc +
907		    imgp->args->envc + 2) * sizeof(u_int32_t));
908
909	/*
910	 * vectp also becomes our initial stack base
911	 */
912	stack_base = vectp;
913
914	stringp = imgp->args->begin_argv;
915	argc = imgp->args->argc;
916	envc = imgp->args->envc;
917	/*
918	 * Copy out strings - arguments and environment.
919	 */
920	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
921
922	/*
923	 * Fill in "ps_strings" struct for ps, w, etc.
924	 */
925	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
926	suword32(&arginfo->ps_nargvstr, argc);
927
928	/*
929	 * Fill in argument portion of vector table.
930	 */
931	for (; argc > 0; --argc) {
932		suword32(vectp++, (uint32_t)(intptr_t)destp);
933		while (*stringp++ != 0)
934			destp++;
935		destp++;
936	}
937
938	/* a null vector table pointer separates the argp's from the envp's */
939	suword32(vectp++, 0);
940
941	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
942	suword32(&arginfo->ps_nenvstr, envc);
943
944	/*
945	 * Fill in environment portion of vector table.
946	 */
947	for (; envc > 0; --envc) {
948		suword32(vectp++, (uint32_t)(intptr_t)destp);
949		while (*stringp++ != 0)
950			destp++;
951		destp++;
952	}
953
954	/* end of vector table is a null pointer */
955	suword32(vectp, 0);
956
957	return ((register_t *)stack_base);
958}
959
960static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
961    "32-bit Linux emulation");
962
963static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
964SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
965    &linux32_maxdsiz, 0, "");
966static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
967SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
968    &linux32_maxssiz, 0, "");
969static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
970SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
971    &linux32_maxvmem, 0, "");
972
973static void
974linux32_fixlimit(struct rlimit *rl, int which)
975{
976
977	switch (which) {
978	case RLIMIT_DATA:
979		if (linux32_maxdsiz != 0) {
980			if (rl->rlim_cur > linux32_maxdsiz)
981				rl->rlim_cur = linux32_maxdsiz;
982			if (rl->rlim_max > linux32_maxdsiz)
983				rl->rlim_max = linux32_maxdsiz;
984		}
985		break;
986	case RLIMIT_STACK:
987		if (linux32_maxssiz != 0) {
988			if (rl->rlim_cur > linux32_maxssiz)
989				rl->rlim_cur = linux32_maxssiz;
990			if (rl->rlim_max > linux32_maxssiz)
991				rl->rlim_max = linux32_maxssiz;
992		}
993		break;
994	case RLIMIT_VMEM:
995		if (linux32_maxvmem != 0) {
996			if (rl->rlim_cur > linux32_maxvmem)
997				rl->rlim_cur = linux32_maxvmem;
998			if (rl->rlim_max > linux32_maxvmem)
999				rl->rlim_max = linux32_maxvmem;
1000		}
1001		break;
1002	}
1003}
1004
1005struct sysentvec elf_linux_sysvec = {
1006	.sv_size	= LINUX_SYS_MAXSYSCALL,
1007	.sv_table	= linux_sysent,
1008	.sv_mask	= 0,
1009	.sv_sigsize	= LINUX_SIGTBLSZ,
1010	.sv_sigtbl	= bsd_to_linux_signal,
1011	.sv_errsize	= ELAST + 1,
1012	.sv_errtbl	= bsd_to_linux_errno,
1013	.sv_transtrap	= translate_traps,
1014	.sv_fixup	= elf_linux_fixup,
1015	.sv_sendsig	= linux_sendsig,
1016	.sv_sigcode	= linux_sigcode,
1017	.sv_szsigcode	= &linux_szsigcode,
1018	.sv_prepsyscall	= NULL,
1019	.sv_name	= "Linux ELF32",
1020	.sv_coredump	= elf32_coredump,
1021	.sv_imgact_try	= exec_linux_imgact_try,
1022	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1023	.sv_pagesize	= PAGE_SIZE,
1024	.sv_minuser	= VM_MIN_ADDRESS,
1025	.sv_maxuser	= LINUX32_MAXUSER,
1026	.sv_usrstack	= LINUX32_USRSTACK,
1027	.sv_psstrings	= LINUX32_PS_STRINGS,
1028	.sv_stackprot	= VM_PROT_ALL,
1029	.sv_copyout_strings = linux_copyout_strings,
1030	.sv_setregs	= exec_linux_setregs,
1031	.sv_fixlimit	= linux32_fixlimit,
1032	.sv_maxssiz	= &linux32_maxssiz,
1033	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
1034	.sv_set_syscall_retval = cpu_set_syscall_retval,
1035	.sv_fetch_syscall_args = linux32_fetch_syscall_args,
1036	.sv_syscallnames = NULL,
1037	.sv_shared_page_base = LINUX32_SHAREDPAGE,
1038	.sv_shared_page_len = PAGE_SIZE,
1039	.sv_schedtail	= linux_schedtail,
1040};
1041INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1042
1043static char GNU_ABI_VENDOR[] = "GNU";
1044static int GNULINUX_ABI_DESC = 0;
1045
1046static boolean_t
1047linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1048{
1049	const Elf32_Word *desc;
1050	uintptr_t p;
1051
1052	p = (uintptr_t)(note + 1);
1053	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1054
1055	desc = (const Elf32_Word *)p;
1056	if (desc[0] != GNULINUX_ABI_DESC)
1057		return (FALSE);
1058
1059	/*
1060	 * For linux we encode osrel as follows (see linux_mib.c):
1061	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1062	 */
1063	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1064
1065	return (TRUE);
1066}
1067
1068static Elf_Brandnote linux32_brandnote = {
1069	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1070	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1071	.hdr.n_type	= 1,
1072	.vendor		= GNU_ABI_VENDOR,
1073	.flags		= BN_TRANSLATE_OSREL,
1074	.trans_osrel	= linux32_trans_osrel
1075};
1076
1077static Elf32_Brandinfo linux_brand = {
1078	.brand		= ELFOSABI_LINUX,
1079	.machine	= EM_386,
1080	.compat_3_brand	= "Linux",
1081	.emul_path	= "/compat/linux",
1082	.interp_path	= "/lib/ld-linux.so.1",
1083	.sysvec		= &elf_linux_sysvec,
1084	.interp_newpath	= NULL,
1085	.brand_note	= &linux32_brandnote,
1086	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1087};
1088
1089static Elf32_Brandinfo linux_glibc2brand = {
1090	.brand		= ELFOSABI_LINUX,
1091	.machine	= EM_386,
1092	.compat_3_brand	= "Linux",
1093	.emul_path	= "/compat/linux",
1094	.interp_path	= "/lib/ld-linux.so.2",
1095	.sysvec		= &elf_linux_sysvec,
1096	.interp_newpath	= NULL,
1097	.brand_note	= &linux32_brandnote,
1098	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1099};
1100
1101Elf32_Brandinfo *linux_brandlist[] = {
1102	&linux_brand,
1103	&linux_glibc2brand,
1104	NULL
1105};
1106
1107static int
1108linux_elf_modevent(module_t mod, int type, void *data)
1109{
1110	Elf32_Brandinfo **brandinfo;
1111	int error;
1112	struct linux_ioctl_handler **lihp;
1113	struct linux_device_handler **ldhp;
1114
1115	error = 0;
1116
1117	switch(type) {
1118	case MOD_LOAD:
1119		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1120		     ++brandinfo)
1121			if (elf32_insert_brand_entry(*brandinfo) < 0)
1122				error = EINVAL;
1123		if (error == 0) {
1124			SET_FOREACH(lihp, linux_ioctl_handler_set)
1125				linux_ioctl_register_handler(*lihp);
1126			SET_FOREACH(ldhp, linux_device_handler_set)
1127				linux_device_register_handler(*ldhp);
1128			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1129			sx_init(&emul_shared_lock, "emuldata->shared lock");
1130			LIST_INIT(&futex_list);
1131			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1132			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1133			    linux_proc_exit, NULL, 1000);
1134			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1135			    linux_proc_exec, NULL, 1000);
1136			linux_szplatform = roundup(strlen(linux_platform) + 1,
1137			    sizeof(char *));
1138			linux_osd_jail_register();
1139			stclohz = (stathz ? stathz : hz);
1140			if (bootverbose)
1141				printf("Linux ELF exec handler installed\n");
1142		} else
1143			printf("cannot insert Linux ELF brand handler\n");
1144		break;
1145	case MOD_UNLOAD:
1146		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1147		     ++brandinfo)
1148			if (elf32_brand_inuse(*brandinfo))
1149				error = EBUSY;
1150		if (error == 0) {
1151			for (brandinfo = &linux_brandlist[0];
1152			     *brandinfo != NULL; ++brandinfo)
1153				if (elf32_remove_brand_entry(*brandinfo) < 0)
1154					error = EINVAL;
1155		}
1156		if (error == 0) {
1157			SET_FOREACH(lihp, linux_ioctl_handler_set)
1158				linux_ioctl_unregister_handler(*lihp);
1159			SET_FOREACH(ldhp, linux_device_handler_set)
1160				linux_device_unregister_handler(*ldhp);
1161			mtx_destroy(&emul_lock);
1162			sx_destroy(&emul_shared_lock);
1163			mtx_destroy(&futex_mtx);
1164			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1165			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1166			linux_osd_jail_deregister();
1167			if (bootverbose)
1168				printf("Linux ELF exec handler removed\n");
1169		} else
1170			printf("Could not deinstall ELF interpreter entry\n");
1171		break;
1172	default:
1173		return EOPNOTSUPP;
1174	}
1175	return error;
1176}
1177
1178static moduledata_t linux_elf_mod = {
1179	"linuxelf",
1180	linux_elf_modevent,
1181	0
1182};
1183
1184DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1185