linux32_sysvec.c revision 191719
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 S�ren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer
14 *    in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/amd64/linux32/linux32_sysvec.c 191719 2009-05-01 15:36:02Z dchagin $");
35#include "opt_compat.h"
36
37#ifndef COMPAT_IA32
38#error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39#endif
40
41#define	__ELF_WORD_SIZE	32
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/exec.h>
46#include <sys/fcntl.h>
47#include <sys/imgact.h>
48#include <sys/imgact_elf.h>
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/module.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/signalvar.h>
57#include <sys/sysctl.h>
58#include <sys/syscallsubr.h>
59#include <sys/sysent.h>
60#include <sys/sysproto.h>
61#include <sys/vnode.h>
62#include <sys/eventhandler.h>
63
64#include <vm/vm.h>
65#include <vm/pmap.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_page.h>
70#include <vm/vm_param.h>
71
72#include <machine/cpu.h>
73#include <machine/md_var.h>
74#include <machine/pcb.h>
75#include <machine/specialreg.h>
76
77#include <amd64/linux32/linux.h>
78#include <amd64/linux32/linux32_proto.h>
79#include <compat/linux/linux_emul.h>
80#include <compat/linux/linux_mib.h>
81#include <compat/linux/linux_misc.h>
82#include <compat/linux/linux_signal.h>
83#include <compat/linux/linux_util.h>
84
85MODULE_VERSION(linux, 1);
86
87MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
88
89#define	AUXARGS_ENTRY_32(pos, id, val)	\
90	do {				\
91		suword32(pos++, id);	\
92		suword32(pos++, val);	\
93	} while (0)
94
95#if BYTE_ORDER == LITTLE_ENDIAN
96#define SHELLMAGIC      0x2123 /* #! */
97#else
98#define SHELLMAGIC      0x2321
99#endif
100
101/*
102 * Allow the sendsig functions to use the ldebug() facility
103 * even though they are not syscalls themselves. Map them
104 * to syscall 0. This is slightly less bogus than using
105 * ldebug(sigreturn).
106 */
107#define	LINUX_SYS_linux_rt_sendsig	0
108#define	LINUX_SYS_linux_sendsig		0
109
110const char *linux_platform = "i686";
111static int linux_szplatform;
112extern char linux_sigcode[];
113extern int linux_szsigcode;
114
115extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
116
117SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
118SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
119
120static int	elf_linux_fixup(register_t **stack_base,
121		    struct image_params *iparams);
122static register_t *linux_copyout_strings(struct image_params *imgp);
123static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
124		    caddr_t *params);
125static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
126static void	exec_linux_setregs(struct thread *td, u_long entry,
127				   u_long stack, u_long ps_strings);
128static void	linux32_fixlimit(struct rlimit *rl, int which);
129
130extern LIST_HEAD(futex_list, futex) futex_list;
131extern struct mtx futex_mtx;
132
133static eventhandler_tag linux_exit_tag;
134static eventhandler_tag linux_schedtail_tag;
135static eventhandler_tag linux_exec_tag;
136
137/*
138 * Linux syscalls return negative errno's, we do positive and map them
139 * Reference:
140 *   FreeBSD: src/sys/sys/errno.h
141 *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
142 *            linux-2.6.17.8/include/asm-generic/errno.h
143 */
144static int bsd_to_linux_errno[ELAST + 1] = {
145	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
146	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
147	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
148	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
149	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
150	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
151	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
152	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
153	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
154	 -72, -67, -71
155};
156
157int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
158	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
159	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
160	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
161	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
162	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
163	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
164	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
165	0, LINUX_SIGUSR1, LINUX_SIGUSR2
166};
167
168int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
169	SIGHUP, SIGINT, SIGQUIT, SIGILL,
170	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
171	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
172	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
173	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
174	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
175	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
176	SIGIO, SIGURG, SIGSYS
177};
178
179#define LINUX_T_UNKNOWN  255
180static int _bsd_to_linux_trapcode[] = {
181	LINUX_T_UNKNOWN,	/* 0 */
182	6,			/* 1  T_PRIVINFLT */
183	LINUX_T_UNKNOWN,	/* 2 */
184	3,			/* 3  T_BPTFLT */
185	LINUX_T_UNKNOWN,	/* 4 */
186	LINUX_T_UNKNOWN,	/* 5 */
187	16,			/* 6  T_ARITHTRAP */
188	254,			/* 7  T_ASTFLT */
189	LINUX_T_UNKNOWN,	/* 8 */
190	13,			/* 9  T_PROTFLT */
191	1,			/* 10 T_TRCTRAP */
192	LINUX_T_UNKNOWN,	/* 11 */
193	14,			/* 12 T_PAGEFLT */
194	LINUX_T_UNKNOWN,	/* 13 */
195	17,			/* 14 T_ALIGNFLT */
196	LINUX_T_UNKNOWN,	/* 15 */
197	LINUX_T_UNKNOWN,	/* 16 */
198	LINUX_T_UNKNOWN,	/* 17 */
199	0,			/* 18 T_DIVIDE */
200	2,			/* 19 T_NMI */
201	4,			/* 20 T_OFLOW */
202	5,			/* 21 T_BOUND */
203	7,			/* 22 T_DNA */
204	8,			/* 23 T_DOUBLEFLT */
205	9,			/* 24 T_FPOPFLT */
206	10,			/* 25 T_TSSFLT */
207	11,			/* 26 T_SEGNPFLT */
208	12,			/* 27 T_STKFLT */
209	18,			/* 28 T_MCHK */
210	19,			/* 29 T_XMMFLT */
211	15			/* 30 T_RESERVED */
212};
213#define bsd_to_linux_trapcode(code) \
214    ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
215     _bsd_to_linux_trapcode[(code)]: \
216     LINUX_T_UNKNOWN)
217
218struct linux32_ps_strings {
219	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
220	u_int ps_nargvstr;	/* the number of argument strings */
221	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
222	u_int ps_nenvstr;	/* the number of environment strings */
223};
224
225/*
226 * If FreeBSD & Linux have a difference of opinion about what a trap
227 * means, deal with it here.
228 *
229 * MPSAFE
230 */
231static int
232translate_traps(int signal, int trap_code)
233{
234	if (signal != SIGBUS)
235		return signal;
236	switch (trap_code) {
237	case T_PROTFLT:
238	case T_TSSFLT:
239	case T_DOUBLEFLT:
240	case T_PAGEFLT:
241		return SIGSEGV;
242	default:
243		return signal;
244	}
245}
246
247static int
248elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
249{
250	Elf32_Auxargs *args;
251	Elf32_Addr *base;
252	Elf32_Addr *pos, *uplatform;
253	struct linux32_ps_strings *arginfo;
254
255	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
256	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
257	    linux_szplatform);
258
259	KASSERT(curthread->td_proc == imgp->proc,
260	    ("unsafe elf_linux_fixup(), should be curproc"));
261	base = (Elf32_Addr *)*stack_base;
262	args = (Elf32_Auxargs *)imgp->auxargs;
263	pos = base + (imgp->args->argc + imgp->args->envc + 2);
264
265	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
266	AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, hz);
267	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
268	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
269	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
270	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
271	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
272	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
273	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
274	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
275	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
276	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
277	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
278	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
279	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
280	if (args->execfd != -1)
281		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
282	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
283
284	free(imgp->auxargs, M_TEMP);
285	imgp->auxargs = NULL;
286
287	base--;
288	suword32(base, (uint32_t)imgp->args->argc);
289	*stack_base = (register_t *)base;
290	return 0;
291}
292
293extern unsigned long linux_sznonrtsigcode;
294
295static void
296linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
297{
298	struct thread *td = curthread;
299	struct proc *p = td->td_proc;
300	struct sigacts *psp;
301	struct trapframe *regs;
302	struct l_rt_sigframe *fp, frame;
303	int oonstack;
304	int sig;
305	int code;
306
307	sig = ksi->ksi_signo;
308	code = ksi->ksi_code;
309	PROC_LOCK_ASSERT(p, MA_OWNED);
310	psp = p->p_sigacts;
311	mtx_assert(&psp->ps_mtx, MA_OWNED);
312	regs = td->td_frame;
313	oonstack = sigonstack(regs->tf_rsp);
314
315#ifdef DEBUG
316	if (ldebug(rt_sendsig))
317		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
318		    catcher, sig, (void*)mask, code);
319#endif
320	/*
321	 * Allocate space for the signal handler context.
322	 */
323	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
324	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
325		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
326		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
327	} else
328		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
329	mtx_unlock(&psp->ps_mtx);
330
331	/*
332	 * Build the argument list for the signal handler.
333	 */
334	if (p->p_sysent->sv_sigtbl)
335		if (sig <= p->p_sysent->sv_sigsize)
336			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
337
338	bzero(&frame, sizeof(frame));
339
340	frame.sf_handler = PTROUT(catcher);
341	frame.sf_sig = sig;
342	frame.sf_siginfo = PTROUT(&fp->sf_si);
343	frame.sf_ucontext = PTROUT(&fp->sf_sc);
344
345	/* Fill in POSIX parts */
346	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
347
348	/*
349	 * Build the signal context to be used by sigreturn.
350	 */
351	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
352	frame.sf_sc.uc_link = 0;		/* XXX ??? */
353
354	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
355	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
356	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
357	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
358	PROC_UNLOCK(p);
359
360	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
361
362	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
363	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
364	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
365	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
366	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
367	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
368	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
369	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
370	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
371	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
372	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
373	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
374	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
375	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
376	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
377	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
378	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
379	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
380	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
381	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
382
383#ifdef DEBUG
384	if (ldebug(rt_sendsig))
385		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
386		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
387		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
388#endif
389
390	if (copyout(&frame, fp, sizeof(frame)) != 0) {
391		/*
392		 * Process has trashed its stack; give it an illegal
393		 * instruction to halt it in its tracks.
394		 */
395#ifdef DEBUG
396		if (ldebug(rt_sendsig))
397			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
398			    fp, oonstack);
399#endif
400		PROC_LOCK(p);
401		sigexit(td, SIGILL);
402	}
403
404	/*
405	 * Build context to run handler in.
406	 */
407	regs->tf_rsp = PTROUT(fp);
408	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
409	    linux_sznonrtsigcode;
410	regs->tf_rflags &= ~(PSL_T | PSL_D);
411	regs->tf_cs = _ucode32sel;
412	regs->tf_ss = _udatasel;
413	regs->tf_ds = _udatasel;
414	regs->tf_es = _udatasel;
415	regs->tf_fs = _ufssel;
416	regs->tf_gs = _ugssel;
417	regs->tf_flags = TF_HASSEGS;
418	PROC_LOCK(p);
419	mtx_lock(&psp->ps_mtx);
420}
421
422
423/*
424 * Send an interrupt to process.
425 *
426 * Stack is set up to allow sigcode stored
427 * in u. to call routine, followed by kcall
428 * to sigreturn routine below.  After sigreturn
429 * resets the signal mask, the stack, and the
430 * frame pointer, it returns to the user
431 * specified pc, psl.
432 */
433static void
434linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
435{
436	struct thread *td = curthread;
437	struct proc *p = td->td_proc;
438	struct sigacts *psp;
439	struct trapframe *regs;
440	struct l_sigframe *fp, frame;
441	l_sigset_t lmask;
442	int oonstack, i;
443	int sig, code;
444
445	sig = ksi->ksi_signo;
446	code = ksi->ksi_code;
447	PROC_LOCK_ASSERT(p, MA_OWNED);
448	psp = p->p_sigacts;
449	mtx_assert(&psp->ps_mtx, MA_OWNED);
450	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
451		/* Signal handler installed with SA_SIGINFO. */
452		linux_rt_sendsig(catcher, ksi, mask);
453		return;
454	}
455
456	regs = td->td_frame;
457	oonstack = sigonstack(regs->tf_rsp);
458
459#ifdef DEBUG
460	if (ldebug(sendsig))
461		printf(ARGS(sendsig, "%p, %d, %p, %u"),
462		    catcher, sig, (void*)mask, code);
463#endif
464
465	/*
466	 * Allocate space for the signal handler context.
467	 */
468	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
469	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
470		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
471		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
472	} else
473		fp = (struct l_sigframe *)regs->tf_rsp - 1;
474	mtx_unlock(&psp->ps_mtx);
475	PROC_UNLOCK(p);
476
477	/*
478	 * Build the argument list for the signal handler.
479	 */
480	if (p->p_sysent->sv_sigtbl)
481		if (sig <= p->p_sysent->sv_sigsize)
482			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
483
484	bzero(&frame, sizeof(frame));
485
486	frame.sf_handler = PTROUT(catcher);
487	frame.sf_sig = sig;
488
489	bsd_to_linux_sigset(mask, &lmask);
490
491	/*
492	 * Build the signal context to be used by sigreturn.
493	 */
494	frame.sf_sc.sc_mask   = lmask.__bits[0];
495	frame.sf_sc.sc_gs     = regs->tf_gs;
496	frame.sf_sc.sc_fs     = regs->tf_fs;
497	frame.sf_sc.sc_es     = regs->tf_es;
498	frame.sf_sc.sc_ds     = regs->tf_ds;
499	frame.sf_sc.sc_edi    = regs->tf_rdi;
500	frame.sf_sc.sc_esi    = regs->tf_rsi;
501	frame.sf_sc.sc_ebp    = regs->tf_rbp;
502	frame.sf_sc.sc_ebx    = regs->tf_rbx;
503	frame.sf_sc.sc_edx    = regs->tf_rdx;
504	frame.sf_sc.sc_ecx    = regs->tf_rcx;
505	frame.sf_sc.sc_eax    = regs->tf_rax;
506	frame.sf_sc.sc_eip    = regs->tf_rip;
507	frame.sf_sc.sc_cs     = regs->tf_cs;
508	frame.sf_sc.sc_eflags = regs->tf_rflags;
509	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
510	frame.sf_sc.sc_ss     = regs->tf_ss;
511	frame.sf_sc.sc_err    = regs->tf_err;
512	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
513	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
514
515	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
516		frame.sf_extramask[i] = lmask.__bits[i+1];
517
518	if (copyout(&frame, fp, sizeof(frame)) != 0) {
519		/*
520		 * Process has trashed its stack; give it an illegal
521		 * instruction to halt it in its tracks.
522		 */
523		PROC_LOCK(p);
524		sigexit(td, SIGILL);
525	}
526
527	/*
528	 * Build context to run handler in.
529	 */
530	regs->tf_rsp = PTROUT(fp);
531	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
532	regs->tf_rflags &= ~(PSL_T | PSL_D);
533	regs->tf_cs = _ucode32sel;
534	regs->tf_ss = _udatasel;
535	regs->tf_ds = _udatasel;
536	regs->tf_es = _udatasel;
537	regs->tf_fs = _ufssel;
538	regs->tf_gs = _ugssel;
539	regs->tf_flags = TF_HASSEGS;
540	PROC_LOCK(p);
541	mtx_lock(&psp->ps_mtx);
542}
543
544/*
545 * System call to cleanup state after a signal
546 * has been taken.  Reset signal mask and
547 * stack state from context left by sendsig (above).
548 * Return to previous pc and psl as specified by
549 * context left by sendsig. Check carefully to
550 * make sure that the user has not modified the
551 * psl to gain improper privileges or to cause
552 * a machine fault.
553 */
554int
555linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
556{
557	struct proc *p = td->td_proc;
558	struct l_sigframe frame;
559	struct trapframe *regs;
560	l_sigset_t lmask;
561	int eflags, i;
562	ksiginfo_t ksi;
563
564	regs = td->td_frame;
565
566#ifdef DEBUG
567	if (ldebug(sigreturn))
568		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
569#endif
570	/*
571	 * The trampoline code hands us the sigframe.
572	 * It is unsafe to keep track of it ourselves, in the event that a
573	 * program jumps out of a signal handler.
574	 */
575	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
576		return (EFAULT);
577
578	/*
579	 * Check for security violations.
580	 */
581#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
582	eflags = frame.sf_sc.sc_eflags;
583	/*
584	 * XXX do allow users to change the privileged flag PSL_RF.  The
585	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
586	 * sometimes set it there too.  tf_eflags is kept in the signal
587	 * context during signal handling and there is no other place
588	 * to remember it, so the PSL_RF bit may be corrupted by the
589	 * signal handler without us knowing.  Corruption of the PSL_RF
590	 * bit at worst causes one more or one less debugger trap, so
591	 * allowing it is fairly harmless.
592	 */
593	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
594		return(EINVAL);
595
596	/*
597	 * Don't allow users to load a valid privileged %cs.  Let the
598	 * hardware check for invalid selectors, excess privilege in
599	 * other selectors, invalid %eip's and invalid %esp's.
600	 */
601#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
602	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
603		ksiginfo_init_trap(&ksi);
604		ksi.ksi_signo = SIGBUS;
605		ksi.ksi_code = BUS_OBJERR;
606		ksi.ksi_trapno = T_PROTFLT;
607		ksi.ksi_addr = (void *)regs->tf_rip;
608		trapsignal(td, &ksi);
609		return(EINVAL);
610	}
611
612	lmask.__bits[0] = frame.sf_sc.sc_mask;
613	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
614		lmask.__bits[i+1] = frame.sf_extramask[i];
615	PROC_LOCK(p);
616	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
617	SIG_CANTMASK(td->td_sigmask);
618	signotify(td);
619	PROC_UNLOCK(p);
620
621	/*
622	 * Restore signal context.
623	 */
624	regs->tf_rdi    = frame.sf_sc.sc_edi;
625	regs->tf_rsi    = frame.sf_sc.sc_esi;
626	regs->tf_rbp    = frame.sf_sc.sc_ebp;
627	regs->tf_rbx    = frame.sf_sc.sc_ebx;
628	regs->tf_rdx    = frame.sf_sc.sc_edx;
629	regs->tf_rcx    = frame.sf_sc.sc_ecx;
630	regs->tf_rax    = frame.sf_sc.sc_eax;
631	regs->tf_rip    = frame.sf_sc.sc_eip;
632	regs->tf_cs     = frame.sf_sc.sc_cs;
633	regs->tf_ds     = frame.sf_sc.sc_ds;
634	regs->tf_es     = frame.sf_sc.sc_es;
635	regs->tf_fs     = frame.sf_sc.sc_fs;
636	regs->tf_gs     = frame.sf_sc.sc_gs;
637	regs->tf_rflags = eflags;
638	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
639	regs->tf_ss     = frame.sf_sc.sc_ss;
640
641	return (EJUSTRETURN);
642}
643
644/*
645 * System call to cleanup state after a signal
646 * has been taken.  Reset signal mask and
647 * stack state from context left by rt_sendsig (above).
648 * Return to previous pc and psl as specified by
649 * context left by sendsig. Check carefully to
650 * make sure that the user has not modified the
651 * psl to gain improper privileges or to cause
652 * a machine fault.
653 */
654int
655linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
656{
657	struct proc *p = td->td_proc;
658	struct l_ucontext uc;
659	struct l_sigcontext *context;
660	l_stack_t *lss;
661	stack_t ss;
662	struct trapframe *regs;
663	int eflags;
664	ksiginfo_t ksi;
665
666	regs = td->td_frame;
667
668#ifdef DEBUG
669	if (ldebug(rt_sigreturn))
670		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
671#endif
672	/*
673	 * The trampoline code hands us the ucontext.
674	 * It is unsafe to keep track of it ourselves, in the event that a
675	 * program jumps out of a signal handler.
676	 */
677	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
678		return (EFAULT);
679
680	context = &uc.uc_mcontext;
681
682	/*
683	 * Check for security violations.
684	 */
685#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
686	eflags = context->sc_eflags;
687	/*
688	 * XXX do allow users to change the privileged flag PSL_RF.  The
689	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
690	 * sometimes set it there too.  tf_eflags is kept in the signal
691	 * context during signal handling and there is no other place
692	 * to remember it, so the PSL_RF bit may be corrupted by the
693	 * signal handler without us knowing.  Corruption of the PSL_RF
694	 * bit at worst causes one more or one less debugger trap, so
695	 * allowing it is fairly harmless.
696	 */
697	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
698		return(EINVAL);
699
700	/*
701	 * Don't allow users to load a valid privileged %cs.  Let the
702	 * hardware check for invalid selectors, excess privilege in
703	 * other selectors, invalid %eip's and invalid %esp's.
704	 */
705#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
706	if (!CS_SECURE(context->sc_cs)) {
707		ksiginfo_init_trap(&ksi);
708		ksi.ksi_signo = SIGBUS;
709		ksi.ksi_code = BUS_OBJERR;
710		ksi.ksi_trapno = T_PROTFLT;
711		ksi.ksi_addr = (void *)regs->tf_rip;
712		trapsignal(td, &ksi);
713		return(EINVAL);
714	}
715
716	PROC_LOCK(p);
717	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
718	SIG_CANTMASK(td->td_sigmask);
719	signotify(td);
720	PROC_UNLOCK(p);
721
722	/*
723	 * Restore signal context
724	 */
725	regs->tf_gs	= context->sc_gs;
726	regs->tf_fs	= context->sc_fs;
727	regs->tf_es	= context->sc_es;
728	regs->tf_ds	= context->sc_ds;
729	regs->tf_rdi    = context->sc_edi;
730	regs->tf_rsi    = context->sc_esi;
731	regs->tf_rbp    = context->sc_ebp;
732	regs->tf_rbx    = context->sc_ebx;
733	regs->tf_rdx    = context->sc_edx;
734	regs->tf_rcx    = context->sc_ecx;
735	regs->tf_rax    = context->sc_eax;
736	regs->tf_rip    = context->sc_eip;
737	regs->tf_cs     = context->sc_cs;
738	regs->tf_rflags = eflags;
739	regs->tf_rsp    = context->sc_esp_at_signal;
740	regs->tf_ss     = context->sc_ss;
741
742	/*
743	 * call sigaltstack & ignore results..
744	 */
745	lss = &uc.uc_stack;
746	ss.ss_sp = PTRIN(lss->ss_sp);
747	ss.ss_size = lss->ss_size;
748	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
749
750#ifdef DEBUG
751	if (ldebug(rt_sigreturn))
752		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
753		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
754#endif
755	(void)kern_sigaltstack(td, &ss, NULL);
756
757	return (EJUSTRETURN);
758}
759
760/*
761 * MPSAFE
762 */
763static void
764linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
765{
766	args[0] = tf->tf_rbx;
767	args[1] = tf->tf_rcx;
768	args[2] = tf->tf_rdx;
769	args[3] = tf->tf_rsi;
770	args[4] = tf->tf_rdi;
771	args[5] = tf->tf_rbp;	/* Unconfirmed */
772	*params = NULL;		/* no copyin */
773}
774
775/*
776 * If a linux binary is exec'ing something, try this image activator
777 * first.  We override standard shell script execution in order to
778 * be able to modify the interpreter path.  We only do this if a linux
779 * binary is doing the exec, so we do not create an EXEC module for it.
780 */
781static int	exec_linux_imgact_try(struct image_params *iparams);
782
783static int
784exec_linux_imgact_try(struct image_params *imgp)
785{
786	const char *head = (const char *)imgp->image_header;
787	char *rpath;
788	int error = -1, len;
789
790	/*
791	* The interpreter for shell scripts run from a linux binary needs
792	* to be located in /compat/linux if possible in order to recursively
793	* maintain linux path emulation.
794	*/
795	if (((const short *)head)[0] == SHELLMAGIC) {
796		/*
797		* Run our normal shell image activator.  If it succeeds attempt
798		* to use the alternate path for the interpreter.  If an
799		* alternate * path is found, use our stringspace to store it.
800		*/
801		if ((error = exec_shell_imgact(imgp)) == 0) {
802			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
803			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
804			    AT_FDCWD);
805			if (rpath != NULL) {
806				len = strlen(rpath) + 1;
807
808				if (len <= MAXSHELLCMDLEN) {
809					memcpy(imgp->interpreter_name, rpath,
810					    len);
811				}
812				free(rpath, M_TEMP);
813			}
814		}
815	}
816	return(error);
817}
818
819/*
820 * Clear registers on exec
821 * XXX copied from ia32_signal.c.
822 */
823static void
824exec_linux_setregs(td, entry, stack, ps_strings)
825	struct thread *td;
826	u_long entry;
827	u_long stack;
828	u_long ps_strings;
829{
830	struct trapframe *regs = td->td_frame;
831	struct pcb *pcb = td->td_pcb;
832
833	mtx_lock(&dt_lock);
834	if (td->td_proc->p_md.md_ldt != NULL)
835		user_ldt_free(td);
836	else
837		mtx_unlock(&dt_lock);
838
839	critical_enter();
840	wrmsr(MSR_FSBASE, 0);
841	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
842	pcb->pcb_fsbase = 0;
843	pcb->pcb_gsbase = 0;
844	critical_exit();
845	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
846
847	bzero((char *)regs, sizeof(struct trapframe));
848	regs->tf_rip = entry;
849	regs->tf_rsp = stack;
850	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
851	regs->tf_gs = _ugssel;
852	regs->tf_fs = _ufssel;
853	regs->tf_es = _udatasel;
854	regs->tf_ds = _udatasel;
855	regs->tf_ss = _udatasel;
856	regs->tf_flags = TF_HASSEGS;
857	regs->tf_cs = _ucode32sel;
858	regs->tf_rbx = ps_strings;
859	load_cr0(rcr0() | CR0_MP | CR0_TS);
860	fpstate_drop(td);
861
862	/* Return via doreti so that we can change to a different %cs */
863	pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
864	pcb->pcb_flags &= ~PCB_GS32BIT;
865	td->td_retval[1] = 0;
866}
867
868/*
869 * XXX copied from ia32_sysvec.c.
870 */
871static register_t *
872linux_copyout_strings(struct image_params *imgp)
873{
874	int argc, envc;
875	u_int32_t *vectp;
876	char *stringp, *destp;
877	u_int32_t *stack_base;
878	struct linux32_ps_strings *arginfo;
879
880	/*
881	 * Calculate string base and vector table pointers.
882	 * Also deal with signal trampoline code for this exec type.
883	 */
884	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
885	destp =	(caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
886	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
887	    sizeof(char *));
888
889	/*
890	 * install sigcode
891	 */
892	copyout(imgp->proc->p_sysent->sv_sigcode,
893	    ((caddr_t)arginfo - linux_szsigcode), linux_szsigcode);
894
895	/*
896	 * Install LINUX_PLATFORM
897	 */
898	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
899	    linux_szplatform), linux_szplatform);
900
901	/*
902	 * If we have a valid auxargs ptr, prepare some room
903	 * on the stack.
904	 */
905	if (imgp->auxargs) {
906		/*
907		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
908		 * lower compatibility.
909		 */
910		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
911		    (LINUX_AT_COUNT * 2);
912		/*
913		 * The '+ 2' is for the null pointers at the end of each of
914		 * the arg and env vector sets,and imgp->auxarg_size is room
915		 * for argument of Runtime loader.
916		 */
917		vectp = (u_int32_t *) (destp - (imgp->args->argc +
918		    imgp->args->envc + 2 + imgp->auxarg_size) *
919		    sizeof(u_int32_t));
920
921	} else
922		/*
923		 * The '+ 2' is for the null pointers at the end of each of
924		 * the arg and env vector sets
925		 */
926		vectp = (u_int32_t *)(destp - (imgp->args->argc +
927		    imgp->args->envc + 2) * sizeof(u_int32_t));
928
929	/*
930	 * vectp also becomes our initial stack base
931	 */
932	stack_base = vectp;
933
934	stringp = imgp->args->begin_argv;
935	argc = imgp->args->argc;
936	envc = imgp->args->envc;
937	/*
938	 * Copy out strings - arguments and environment.
939	 */
940	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
941
942	/*
943	 * Fill in "ps_strings" struct for ps, w, etc.
944	 */
945	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
946	suword32(&arginfo->ps_nargvstr, argc);
947
948	/*
949	 * Fill in argument portion of vector table.
950	 */
951	for (; argc > 0; --argc) {
952		suword32(vectp++, (uint32_t)(intptr_t)destp);
953		while (*stringp++ != 0)
954			destp++;
955		destp++;
956	}
957
958	/* a null vector table pointer separates the argp's from the envp's */
959	suword32(vectp++, 0);
960
961	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
962	suword32(&arginfo->ps_nenvstr, envc);
963
964	/*
965	 * Fill in environment portion of vector table.
966	 */
967	for (; envc > 0; --envc) {
968		suword32(vectp++, (uint32_t)(intptr_t)destp);
969		while (*stringp++ != 0)
970			destp++;
971		destp++;
972	}
973
974	/* end of vector table is a null pointer */
975	suword32(vectp, 0);
976
977	return ((register_t *)stack_base);
978}
979
980SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
981    "32-bit Linux emulation");
982
983static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
984SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
985    &linux32_maxdsiz, 0, "");
986static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
987SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
988    &linux32_maxssiz, 0, "");
989static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
990SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
991    &linux32_maxvmem, 0, "");
992
993static void
994linux32_fixlimit(struct rlimit *rl, int which)
995{
996
997	switch (which) {
998	case RLIMIT_DATA:
999		if (linux32_maxdsiz != 0) {
1000			if (rl->rlim_cur > linux32_maxdsiz)
1001				rl->rlim_cur = linux32_maxdsiz;
1002			if (rl->rlim_max > linux32_maxdsiz)
1003				rl->rlim_max = linux32_maxdsiz;
1004		}
1005		break;
1006	case RLIMIT_STACK:
1007		if (linux32_maxssiz != 0) {
1008			if (rl->rlim_cur > linux32_maxssiz)
1009				rl->rlim_cur = linux32_maxssiz;
1010			if (rl->rlim_max > linux32_maxssiz)
1011				rl->rlim_max = linux32_maxssiz;
1012		}
1013		break;
1014	case RLIMIT_VMEM:
1015		if (linux32_maxvmem != 0) {
1016			if (rl->rlim_cur > linux32_maxvmem)
1017				rl->rlim_cur = linux32_maxvmem;
1018			if (rl->rlim_max > linux32_maxvmem)
1019				rl->rlim_max = linux32_maxvmem;
1020		}
1021		break;
1022	}
1023}
1024
1025struct sysentvec elf_linux_sysvec = {
1026	.sv_size	= LINUX_SYS_MAXSYSCALL,
1027	.sv_table	= linux_sysent,
1028	.sv_mask	= 0,
1029	.sv_sigsize	= LINUX_SIGTBLSZ,
1030	.sv_sigtbl	= bsd_to_linux_signal,
1031	.sv_errsize	= ELAST + 1,
1032	.sv_errtbl	= bsd_to_linux_errno,
1033	.sv_transtrap	= translate_traps,
1034	.sv_fixup	= elf_linux_fixup,
1035	.sv_sendsig	= linux_sendsig,
1036	.sv_sigcode	= linux_sigcode,
1037	.sv_szsigcode	= &linux_szsigcode,
1038	.sv_prepsyscall	= linux_prepsyscall,
1039	.sv_name	= "Linux ELF32",
1040	.sv_coredump	= elf32_coredump,
1041	.sv_imgact_try	= exec_linux_imgact_try,
1042	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1043	.sv_pagesize	= PAGE_SIZE,
1044	.sv_minuser	= VM_MIN_ADDRESS,
1045	.sv_maxuser	= LINUX32_USRSTACK,
1046	.sv_usrstack	= LINUX32_USRSTACK,
1047	.sv_psstrings	= LINUX32_PS_STRINGS,
1048	.sv_stackprot	= VM_PROT_ALL,
1049	.sv_copyout_strings = linux_copyout_strings,
1050	.sv_setregs	= exec_linux_setregs,
1051	.sv_fixlimit	= linux32_fixlimit,
1052	.sv_maxssiz	= &linux32_maxssiz,
1053	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32
1054};
1055
1056static char GNULINUX_ABI_VENDOR[] = "GNU";
1057
1058static Elf_Brandnote linux32_brandnote = {
1059	.hdr.n_namesz	= sizeof(GNULINUX_ABI_VENDOR),
1060	.hdr.n_descsz	= 16,
1061	.hdr.n_type	= 1,
1062	.vendor		= GNULINUX_ABI_VENDOR,
1063	.flags		= 0
1064};
1065
1066static Elf32_Brandinfo linux_brand = {
1067	.brand		= ELFOSABI_LINUX,
1068	.machine	= EM_386,
1069	.compat_3_brand	= "Linux",
1070	.emul_path	= "/compat/linux",
1071	.interp_path	= "/lib/ld-linux.so.1",
1072	.sysvec		= &elf_linux_sysvec,
1073	.interp_newpath	= NULL,
1074	.brand_note	= &linux32_brandnote,
1075	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1076};
1077
1078static Elf32_Brandinfo linux_glibc2brand = {
1079	.brand		= ELFOSABI_LINUX,
1080	.machine	= EM_386,
1081	.compat_3_brand	= "Linux",
1082	.emul_path	= "/compat/linux",
1083	.interp_path	= "/lib/ld-linux.so.2",
1084	.sysvec		= &elf_linux_sysvec,
1085	.interp_newpath	= NULL,
1086	.brand_note	= &linux32_brandnote,
1087	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1088};
1089
1090Elf32_Brandinfo *linux_brandlist[] = {
1091	&linux_brand,
1092	&linux_glibc2brand,
1093	NULL
1094};
1095
1096static int
1097linux_elf_modevent(module_t mod, int type, void *data)
1098{
1099	Elf32_Brandinfo **brandinfo;
1100	int error;
1101	struct linux_ioctl_handler **lihp;
1102	struct linux_device_handler **ldhp;
1103
1104	error = 0;
1105
1106	switch(type) {
1107	case MOD_LOAD:
1108		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1109		     ++brandinfo)
1110			if (elf32_insert_brand_entry(*brandinfo) < 0)
1111				error = EINVAL;
1112		if (error == 0) {
1113			SET_FOREACH(lihp, linux_ioctl_handler_set)
1114				linux_ioctl_register_handler(*lihp);
1115			SET_FOREACH(ldhp, linux_device_handler_set)
1116				linux_device_register_handler(*ldhp);
1117			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1118			sx_init(&emul_shared_lock, "emuldata->shared lock");
1119			LIST_INIT(&futex_list);
1120			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1121			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1122			    linux_proc_exit, NULL, 1000);
1123			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail,
1124			    linux_schedtail, NULL, 1000);
1125			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1126			    linux_proc_exec, NULL, 1000);
1127			linux_szplatform = roundup(strlen(linux_platform) + 1,
1128			    sizeof(char *));
1129			if (bootverbose)
1130				printf("Linux ELF exec handler installed\n");
1131		} else
1132			printf("cannot insert Linux ELF brand handler\n");
1133		break;
1134	case MOD_UNLOAD:
1135		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1136		     ++brandinfo)
1137			if (elf32_brand_inuse(*brandinfo))
1138				error = EBUSY;
1139		if (error == 0) {
1140			for (brandinfo = &linux_brandlist[0];
1141			     *brandinfo != NULL; ++brandinfo)
1142				if (elf32_remove_brand_entry(*brandinfo) < 0)
1143					error = EINVAL;
1144		}
1145		if (error == 0) {
1146			SET_FOREACH(lihp, linux_ioctl_handler_set)
1147				linux_ioctl_unregister_handler(*lihp);
1148			SET_FOREACH(ldhp, linux_device_handler_set)
1149				linux_device_unregister_handler(*ldhp);
1150			mtx_destroy(&emul_lock);
1151			sx_destroy(&emul_shared_lock);
1152			mtx_destroy(&futex_mtx);
1153			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1154			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1155			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1156			if (bootverbose)
1157				printf("Linux ELF exec handler removed\n");
1158		} else
1159			printf("Could not deinstall ELF interpreter entry\n");
1160		break;
1161	default:
1162		return EOPNOTSUPP;
1163	}
1164	return error;
1165}
1166
1167static moduledata_t linux_elf_mod = {
1168	"linuxelf",
1169	linux_elf_modevent,
1170	0
1171};
1172
1173DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1174