linux_sysvec.c revision 219405
1/*-
2 * Copyright (c) 1994-1996 S�ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 *    derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/i386/linux/linux_sysvec.c 219405 2011-03-08 19:01:45Z dchagin $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/exec.h>
35#include <sys/fcntl.h>
36#include <sys/imgact.h>
37#include <sys/imgact_aout.h>
38#include <sys/imgact_elf.h>
39#include <sys/kernel.h>
40#include <sys/lock.h>
41#include <sys/malloc.h>
42#include <sys/module.h>
43#include <sys/mutex.h>
44#include <sys/proc.h>
45#include <sys/signalvar.h>
46#include <sys/syscallsubr.h>
47#include <sys/sysent.h>
48#include <sys/sysproto.h>
49#include <sys/vnode.h>
50#include <sys/eventhandler.h>
51
52#include <vm/vm.h>
53#include <vm/pmap.h>
54#include <vm/vm_extern.h>
55#include <vm/vm_map.h>
56#include <vm/vm_object.h>
57#include <vm/vm_page.h>
58#include <vm/vm_param.h>
59
60#include <machine/cpu.h>
61#include <machine/cputypes.h>
62#include <machine/md_var.h>
63#include <machine/pcb.h>
64
65#include <i386/linux/linux.h>
66#include <i386/linux/linux_proto.h>
67#include <compat/linux/linux_emul.h>
68#include <compat/linux/linux_futex.h>
69#include <compat/linux/linux_mib.h>
70#include <compat/linux/linux_misc.h>
71#include <compat/linux/linux_signal.h>
72#include <compat/linux/linux_util.h>
73
74MODULE_VERSION(linux, 1);
75
76MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
77
78#if BYTE_ORDER == LITTLE_ENDIAN
79#define SHELLMAGIC      0x2123 /* #! */
80#else
81#define SHELLMAGIC      0x2321
82#endif
83
84/*
85 * Allow the sendsig functions to use the ldebug() facility
86 * even though they are not syscalls themselves. Map them
87 * to syscall 0. This is slightly less bogus than using
88 * ldebug(sigreturn).
89 */
90#define	LINUX_SYS_linux_rt_sendsig	0
91#define	LINUX_SYS_linux_sendsig		0
92
93extern char linux_sigcode[];
94extern int linux_szsigcode;
95
96extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
97
98SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
99SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
100
101static int	linux_fixup(register_t **stack_base,
102		    struct image_params *iparams);
103static int	elf_linux_fixup(register_t **stack_base,
104		    struct image_params *iparams);
105static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
106static void	exec_linux_setregs(struct thread *td,
107		    struct image_params *imgp, u_long stack);
108static register_t *linux_copyout_strings(struct image_params *imgp);
109static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
110
111static int linux_szplatform;
112const char *linux_platform;
113
114static eventhandler_tag linux_exit_tag;
115static eventhandler_tag linux_exec_tag;
116
117/*
118 * Linux syscalls return negative errno's, we do positive and map them
119 * Reference:
120 *   FreeBSD: src/sys/sys/errno.h
121 *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
122 *            linux-2.6.17.8/include/asm-generic/errno.h
123 */
124static int bsd_to_linux_errno[ELAST + 1] = {
125	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
126	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
127	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
128	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
129	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
130	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
131	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
132	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
133	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
134	 -72, -67, -71
135};
136
137int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
138	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
139	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
140	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
141	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
142	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
143	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
144	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
145	0, LINUX_SIGUSR1, LINUX_SIGUSR2
146};
147
148int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
149	SIGHUP, SIGINT, SIGQUIT, SIGILL,
150	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
151	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
152	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
153	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
154	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
155	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
156	SIGIO, SIGURG, SIGSYS
157};
158
159#define LINUX_T_UNKNOWN  255
160static int _bsd_to_linux_trapcode[] = {
161	LINUX_T_UNKNOWN,	/* 0 */
162	6,			/* 1  T_PRIVINFLT */
163	LINUX_T_UNKNOWN,	/* 2 */
164	3,			/* 3  T_BPTFLT */
165	LINUX_T_UNKNOWN,	/* 4 */
166	LINUX_T_UNKNOWN,	/* 5 */
167	16,			/* 6  T_ARITHTRAP */
168	254,			/* 7  T_ASTFLT */
169	LINUX_T_UNKNOWN,	/* 8 */
170	13,			/* 9  T_PROTFLT */
171	1,			/* 10 T_TRCTRAP */
172	LINUX_T_UNKNOWN,	/* 11 */
173	14,			/* 12 T_PAGEFLT */
174	LINUX_T_UNKNOWN,	/* 13 */
175	17,			/* 14 T_ALIGNFLT */
176	LINUX_T_UNKNOWN,	/* 15 */
177	LINUX_T_UNKNOWN,	/* 16 */
178	LINUX_T_UNKNOWN,	/* 17 */
179	0,			/* 18 T_DIVIDE */
180	2,			/* 19 T_NMI */
181	4,			/* 20 T_OFLOW */
182	5,			/* 21 T_BOUND */
183	7,			/* 22 T_DNA */
184	8,			/* 23 T_DOUBLEFLT */
185	9,			/* 24 T_FPOPFLT */
186	10,			/* 25 T_TSSFLT */
187	11,			/* 26 T_SEGNPFLT */
188	12,			/* 27 T_STKFLT */
189	18,			/* 28 T_MCHK */
190	19,			/* 29 T_XMMFLT */
191	15			/* 30 T_RESERVED */
192};
193#define bsd_to_linux_trapcode(code) \
194    ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
195     _bsd_to_linux_trapcode[(code)]: \
196     LINUX_T_UNKNOWN)
197
198/*
199 * If FreeBSD & Linux have a difference of opinion about what a trap
200 * means, deal with it here.
201 *
202 * MPSAFE
203 */
204static int
205translate_traps(int signal, int trap_code)
206{
207	if (signal != SIGBUS)
208		return signal;
209	switch (trap_code) {
210	case T_PROTFLT:
211	case T_TSSFLT:
212	case T_DOUBLEFLT:
213	case T_PAGEFLT:
214		return SIGSEGV;
215	default:
216		return signal;
217	}
218}
219
220static int
221linux_fixup(register_t **stack_base, struct image_params *imgp)
222{
223	register_t *argv, *envp;
224
225	argv = *stack_base;
226	envp = *stack_base + (imgp->args->argc + 1);
227	(*stack_base)--;
228	**stack_base = (intptr_t)(void *)envp;
229	(*stack_base)--;
230	**stack_base = (intptr_t)(void *)argv;
231	(*stack_base)--;
232	**stack_base = imgp->args->argc;
233	return (0);
234}
235
236static int
237elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
238{
239	struct proc *p;
240	Elf32_Auxargs *args;
241	Elf32_Addr *uplatform;
242	struct ps_strings *arginfo;
243	register_t *pos;
244
245	KASSERT(curthread->td_proc == imgp->proc,
246	    ("unsafe elf_linux_fixup(), should be curproc"));
247
248	p = imgp->proc;
249	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
250	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
251	    linux_szplatform);
252	args = (Elf32_Auxargs *)imgp->auxargs;
253	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
254
255	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
256
257	/*
258	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
259	 * as it has appeared in the 2.4.0-rc7 first time.
260	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
261	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
262	 * is not present.
263	 * Also see linux_times() implementation.
264	 */
265	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
266		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
267	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
268	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
269	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
270	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
271	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
272	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
273	AUXARGS_ENTRY(pos, AT_BASE, args->base);
274	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
275	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
276	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
277	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
278	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
279	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
280	if (args->execfd != -1)
281		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
282	AUXARGS_ENTRY(pos, AT_NULL, 0);
283
284	free(imgp->auxargs, M_TEMP);
285	imgp->auxargs = NULL;
286
287	(*stack_base)--;
288	**stack_base = (register_t)imgp->args->argc;
289	return (0);
290}
291
292/*
293 * Copied from kern/kern_exec.c
294 */
295static register_t *
296linux_copyout_strings(struct image_params *imgp)
297{
298	int argc, envc;
299	char **vectp;
300	char *stringp, *destp;
301	register_t *stack_base;
302	struct ps_strings *arginfo;
303	struct proc *p;
304
305	/*
306	 * Calculate string base and vector table pointers.
307	 * Also deal with signal trampoline code for this exec type.
308	 */
309	p = imgp->proc;
310	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
311	destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
312	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
313	    sizeof(char *));
314
315	/*
316	 * install sigcode
317	 */
318	copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
319	    linux_szsigcode), linux_szsigcode);
320
321	/*
322	 * install LINUX_PLATFORM
323	 */
324	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
325	    linux_szplatform), linux_szplatform);
326
327	/*
328	 * If we have a valid auxargs ptr, prepare some room
329	 * on the stack.
330	 */
331	if (imgp->auxargs) {
332		/*
333		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
334		 * lower compatibility.
335		 */
336		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
337		    (LINUX_AT_COUNT * 2);
338		/*
339		 * The '+ 2' is for the null pointers at the end of each of
340		 * the arg and env vector sets,and imgp->auxarg_size is room
341		 * for argument of Runtime loader.
342		 */
343		vectp = (char **)(destp - (imgp->args->argc +
344		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
345	} else {
346		/*
347		 * The '+ 2' is for the null pointers at the end of each of
348		 * the arg and env vector sets
349		 */
350		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
351		    sizeof(char *));
352	}
353
354	/*
355	 * vectp also becomes our initial stack base
356	 */
357	stack_base = (register_t *)vectp;
358
359	stringp = imgp->args->begin_argv;
360	argc = imgp->args->argc;
361	envc = imgp->args->envc;
362
363	/*
364	 * Copy out strings - arguments and environment.
365	 */
366	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
367
368	/*
369	 * Fill in "ps_strings" struct for ps, w, etc.
370	 */
371	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
372	suword(&arginfo->ps_nargvstr, argc);
373
374	/*
375	 * Fill in argument portion of vector table.
376	 */
377	for (; argc > 0; --argc) {
378		suword(vectp++, (long)(intptr_t)destp);
379		while (*stringp++ != 0)
380			destp++;
381		destp++;
382	}
383
384	/* a null vector table pointer separates the argp's from the envp's */
385	suword(vectp++, 0);
386
387	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
388	suword(&arginfo->ps_nenvstr, envc);
389
390	/*
391	 * Fill in environment portion of vector table.
392	 */
393	for (; envc > 0; --envc) {
394		suword(vectp++, (long)(intptr_t)destp);
395		while (*stringp++ != 0)
396			destp++;
397		destp++;
398	}
399
400	/* end of vector table is a null pointer */
401	suword(vectp, 0);
402
403	return (stack_base);
404}
405
406
407
408extern int _ucodesel, _udatasel;
409extern unsigned long linux_sznonrtsigcode;
410
411static void
412linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
413{
414	struct thread *td = curthread;
415	struct proc *p = td->td_proc;
416	struct sigacts *psp;
417	struct trapframe *regs;
418	struct l_rt_sigframe *fp, frame;
419	int sig, code;
420	int oonstack;
421
422	sig = ksi->ksi_signo;
423	code = ksi->ksi_code;
424	PROC_LOCK_ASSERT(p, MA_OWNED);
425	psp = p->p_sigacts;
426	mtx_assert(&psp->ps_mtx, MA_OWNED);
427	regs = td->td_frame;
428	oonstack = sigonstack(regs->tf_esp);
429
430#ifdef DEBUG
431	if (ldebug(rt_sendsig))
432		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
433		    catcher, sig, (void*)mask, code);
434#endif
435	/*
436	 * Allocate space for the signal handler context.
437	 */
438	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
439	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
440		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
441		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
442	} else
443		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
444	mtx_unlock(&psp->ps_mtx);
445
446	/*
447	 * Build the argument list for the signal handler.
448	 */
449	if (p->p_sysent->sv_sigtbl)
450		if (sig <= p->p_sysent->sv_sigsize)
451			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
452
453	bzero(&frame, sizeof(frame));
454
455	frame.sf_handler = catcher;
456	frame.sf_sig = sig;
457	frame.sf_siginfo = &fp->sf_si;
458	frame.sf_ucontext = &fp->sf_sc;
459
460	/* Fill in POSIX parts */
461	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
462
463	/*
464	 * Build the signal context to be used by sigreturn.
465	 */
466	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
467	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
468
469	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
470	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
471	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
472	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
473	PROC_UNLOCK(p);
474
475	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
476
477	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
478	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
479	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
480	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
481	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
482	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
483	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
484	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
485	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
486	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
487	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
488	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
489	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
490	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
491	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
492	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
493	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
494	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
495	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
496	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
497
498#ifdef DEBUG
499	if (ldebug(rt_sendsig))
500		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
501		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
502		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
503#endif
504
505	if (copyout(&frame, fp, sizeof(frame)) != 0) {
506		/*
507		 * Process has trashed its stack; give it an illegal
508		 * instruction to halt it in its tracks.
509		 */
510#ifdef DEBUG
511		if (ldebug(rt_sendsig))
512			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
513			    fp, oonstack);
514#endif
515		PROC_LOCK(p);
516		sigexit(td, SIGILL);
517	}
518
519	/*
520	 * Build context to run handler in.
521	 */
522	regs->tf_esp = (int)fp;
523	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
524	    linux_sznonrtsigcode;
525	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
526	regs->tf_cs = _ucodesel;
527	regs->tf_ds = _udatasel;
528	regs->tf_es = _udatasel;
529	regs->tf_fs = _udatasel;
530	regs->tf_ss = _udatasel;
531	PROC_LOCK(p);
532	mtx_lock(&psp->ps_mtx);
533}
534
535
536/*
537 * Send an interrupt to process.
538 *
539 * Stack is set up to allow sigcode stored
540 * in u. to call routine, followed by kcall
541 * to sigreturn routine below.  After sigreturn
542 * resets the signal mask, the stack, and the
543 * frame pointer, it returns to the user
544 * specified pc, psl.
545 */
546static void
547linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
548{
549	struct thread *td = curthread;
550	struct proc *p = td->td_proc;
551	struct sigacts *psp;
552	struct trapframe *regs;
553	struct l_sigframe *fp, frame;
554	l_sigset_t lmask;
555	int sig, code;
556	int oonstack, i;
557
558	PROC_LOCK_ASSERT(p, MA_OWNED);
559	psp = p->p_sigacts;
560	sig = ksi->ksi_signo;
561	code = ksi->ksi_code;
562	mtx_assert(&psp->ps_mtx, MA_OWNED);
563	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
564		/* Signal handler installed with SA_SIGINFO. */
565		linux_rt_sendsig(catcher, ksi, mask);
566		return;
567	}
568	regs = td->td_frame;
569	oonstack = sigonstack(regs->tf_esp);
570
571#ifdef DEBUG
572	if (ldebug(sendsig))
573		printf(ARGS(sendsig, "%p, %d, %p, %u"),
574		    catcher, sig, (void*)mask, code);
575#endif
576
577	/*
578	 * Allocate space for the signal handler context.
579	 */
580	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
581	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
582		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
583		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
584	} else
585		fp = (struct l_sigframe *)regs->tf_esp - 1;
586	mtx_unlock(&psp->ps_mtx);
587	PROC_UNLOCK(p);
588
589	/*
590	 * Build the argument list for the signal handler.
591	 */
592	if (p->p_sysent->sv_sigtbl)
593		if (sig <= p->p_sysent->sv_sigsize)
594			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
595
596	bzero(&frame, sizeof(frame));
597
598	frame.sf_handler = catcher;
599	frame.sf_sig = sig;
600
601	bsd_to_linux_sigset(mask, &lmask);
602
603	/*
604	 * Build the signal context to be used by sigreturn.
605	 */
606	frame.sf_sc.sc_mask   = lmask.__bits[0];
607	frame.sf_sc.sc_gs     = rgs();
608	frame.sf_sc.sc_fs     = regs->tf_fs;
609	frame.sf_sc.sc_es     = regs->tf_es;
610	frame.sf_sc.sc_ds     = regs->tf_ds;
611	frame.sf_sc.sc_edi    = regs->tf_edi;
612	frame.sf_sc.sc_esi    = regs->tf_esi;
613	frame.sf_sc.sc_ebp    = regs->tf_ebp;
614	frame.sf_sc.sc_ebx    = regs->tf_ebx;
615	frame.sf_sc.sc_edx    = regs->tf_edx;
616	frame.sf_sc.sc_ecx    = regs->tf_ecx;
617	frame.sf_sc.sc_eax    = regs->tf_eax;
618	frame.sf_sc.sc_eip    = regs->tf_eip;
619	frame.sf_sc.sc_cs     = regs->tf_cs;
620	frame.sf_sc.sc_eflags = regs->tf_eflags;
621	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
622	frame.sf_sc.sc_ss     = regs->tf_ss;
623	frame.sf_sc.sc_err    = regs->tf_err;
624	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
625	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
626
627	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
628		frame.sf_extramask[i] = lmask.__bits[i+1];
629
630	if (copyout(&frame, fp, sizeof(frame)) != 0) {
631		/*
632		 * Process has trashed its stack; give it an illegal
633		 * instruction to halt it in its tracks.
634		 */
635		PROC_LOCK(p);
636		sigexit(td, SIGILL);
637	}
638
639	/*
640	 * Build context to run handler in.
641	 */
642	regs->tf_esp = (int)fp;
643	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
644	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
645	regs->tf_cs = _ucodesel;
646	regs->tf_ds = _udatasel;
647	regs->tf_es = _udatasel;
648	regs->tf_fs = _udatasel;
649	regs->tf_ss = _udatasel;
650	PROC_LOCK(p);
651	mtx_lock(&psp->ps_mtx);
652}
653
654/*
655 * System call to cleanup state after a signal
656 * has been taken.  Reset signal mask and
657 * stack state from context left by sendsig (above).
658 * Return to previous pc and psl as specified by
659 * context left by sendsig. Check carefully to
660 * make sure that the user has not modified the
661 * psl to gain improper privileges or to cause
662 * a machine fault.
663 */
664int
665linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
666{
667	struct l_sigframe frame;
668	struct trapframe *regs;
669	l_sigset_t lmask;
670	sigset_t bmask;
671	int eflags, i;
672	ksiginfo_t ksi;
673
674	regs = td->td_frame;
675
676#ifdef DEBUG
677	if (ldebug(sigreturn))
678		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
679#endif
680	/*
681	 * The trampoline code hands us the sigframe.
682	 * It is unsafe to keep track of it ourselves, in the event that a
683	 * program jumps out of a signal handler.
684	 */
685	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
686		return (EFAULT);
687
688	/*
689	 * Check for security violations.
690	 */
691#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
692	eflags = frame.sf_sc.sc_eflags;
693	/*
694	 * XXX do allow users to change the privileged flag PSL_RF.  The
695	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
696	 * sometimes set it there too.  tf_eflags is kept in the signal
697	 * context during signal handling and there is no other place
698	 * to remember it, so the PSL_RF bit may be corrupted by the
699	 * signal handler without us knowing.  Corruption of the PSL_RF
700	 * bit at worst causes one more or one less debugger trap, so
701	 * allowing it is fairly harmless.
702	 */
703	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
704		return(EINVAL);
705
706	/*
707	 * Don't allow users to load a valid privileged %cs.  Let the
708	 * hardware check for invalid selectors, excess privilege in
709	 * other selectors, invalid %eip's and invalid %esp's.
710	 */
711#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
712	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
713		ksiginfo_init_trap(&ksi);
714		ksi.ksi_signo = SIGBUS;
715		ksi.ksi_code = BUS_OBJERR;
716		ksi.ksi_trapno = T_PROTFLT;
717		ksi.ksi_addr = (void *)regs->tf_eip;
718		trapsignal(td, &ksi);
719		return(EINVAL);
720	}
721
722	lmask.__bits[0] = frame.sf_sc.sc_mask;
723	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
724		lmask.__bits[i+1] = frame.sf_extramask[i];
725	linux_to_bsd_sigset(&lmask, &bmask);
726	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
727
728	/*
729	 * Restore signal context.
730	 */
731	/* %gs was restored by the trampoline. */
732	regs->tf_fs     = frame.sf_sc.sc_fs;
733	regs->tf_es     = frame.sf_sc.sc_es;
734	regs->tf_ds     = frame.sf_sc.sc_ds;
735	regs->tf_edi    = frame.sf_sc.sc_edi;
736	regs->tf_esi    = frame.sf_sc.sc_esi;
737	regs->tf_ebp    = frame.sf_sc.sc_ebp;
738	regs->tf_ebx    = frame.sf_sc.sc_ebx;
739	regs->tf_edx    = frame.sf_sc.sc_edx;
740	regs->tf_ecx    = frame.sf_sc.sc_ecx;
741	regs->tf_eax    = frame.sf_sc.sc_eax;
742	regs->tf_eip    = frame.sf_sc.sc_eip;
743	regs->tf_cs     = frame.sf_sc.sc_cs;
744	regs->tf_eflags = eflags;
745	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
746	regs->tf_ss     = frame.sf_sc.sc_ss;
747
748	return (EJUSTRETURN);
749}
750
751/*
752 * System call to cleanup state after a signal
753 * has been taken.  Reset signal mask and
754 * stack state from context left by rt_sendsig (above).
755 * Return to previous pc and psl as specified by
756 * context left by sendsig. Check carefully to
757 * make sure that the user has not modified the
758 * psl to gain improper privileges or to cause
759 * a machine fault.
760 */
761int
762linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
763{
764	struct l_ucontext uc;
765	struct l_sigcontext *context;
766	sigset_t bmask;
767	l_stack_t *lss;
768	stack_t ss;
769	struct trapframe *regs;
770	int eflags;
771	ksiginfo_t ksi;
772
773	regs = td->td_frame;
774
775#ifdef DEBUG
776	if (ldebug(rt_sigreturn))
777		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
778#endif
779	/*
780	 * The trampoline code hands us the ucontext.
781	 * It is unsafe to keep track of it ourselves, in the event that a
782	 * program jumps out of a signal handler.
783	 */
784	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
785		return (EFAULT);
786
787	context = &uc.uc_mcontext;
788
789	/*
790	 * Check for security violations.
791	 */
792#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
793	eflags = context->sc_eflags;
794	/*
795	 * XXX do allow users to change the privileged flag PSL_RF.  The
796	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
797	 * sometimes set it there too.  tf_eflags is kept in the signal
798	 * context during signal handling and there is no other place
799	 * to remember it, so the PSL_RF bit may be corrupted by the
800	 * signal handler without us knowing.  Corruption of the PSL_RF
801	 * bit at worst causes one more or one less debugger trap, so
802	 * allowing it is fairly harmless.
803	 */
804	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
805		return(EINVAL);
806
807	/*
808	 * Don't allow users to load a valid privileged %cs.  Let the
809	 * hardware check for invalid selectors, excess privilege in
810	 * other selectors, invalid %eip's and invalid %esp's.
811	 */
812#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
813	if (!CS_SECURE(context->sc_cs)) {
814		ksiginfo_init_trap(&ksi);
815		ksi.ksi_signo = SIGBUS;
816		ksi.ksi_code = BUS_OBJERR;
817		ksi.ksi_trapno = T_PROTFLT;
818		ksi.ksi_addr = (void *)regs->tf_eip;
819		trapsignal(td, &ksi);
820		return(EINVAL);
821	}
822
823	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
824	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
825
826	/*
827	 * Restore signal context
828	 */
829	/* %gs was restored by the trampoline. */
830	regs->tf_fs     = context->sc_fs;
831	regs->tf_es     = context->sc_es;
832	regs->tf_ds     = context->sc_ds;
833	regs->tf_edi    = context->sc_edi;
834	regs->tf_esi    = context->sc_esi;
835	regs->tf_ebp    = context->sc_ebp;
836	regs->tf_ebx    = context->sc_ebx;
837	regs->tf_edx    = context->sc_edx;
838	regs->tf_ecx    = context->sc_ecx;
839	regs->tf_eax    = context->sc_eax;
840	regs->tf_eip    = context->sc_eip;
841	regs->tf_cs     = context->sc_cs;
842	regs->tf_eflags = eflags;
843	regs->tf_esp    = context->sc_esp_at_signal;
844	regs->tf_ss     = context->sc_ss;
845
846	/*
847	 * call sigaltstack & ignore results..
848	 */
849	lss = &uc.uc_stack;
850	ss.ss_sp = lss->ss_sp;
851	ss.ss_size = lss->ss_size;
852	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
853
854#ifdef DEBUG
855	if (ldebug(rt_sigreturn))
856		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
857		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
858#endif
859	(void)kern_sigaltstack(td, &ss, NULL);
860
861	return (EJUSTRETURN);
862}
863
864static int
865linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
866{
867	struct proc *p;
868	struct trapframe *frame;
869
870	p = td->td_proc;
871	frame = td->td_frame;
872
873	sa->code = frame->tf_eax;
874	sa->args[0] = frame->tf_ebx;
875	sa->args[1] = frame->tf_ecx;
876	sa->args[2] = frame->tf_edx;
877	sa->args[3] = frame->tf_esi;
878	sa->args[4] = frame->tf_edi;
879	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
880
881	if (sa->code >= p->p_sysent->sv_size)
882		sa->callp = &p->p_sysent->sv_table[0];
883 	else
884 		sa->callp = &p->p_sysent->sv_table[sa->code];
885	sa->narg = sa->callp->sy_narg;
886
887	td->td_retval[0] = 0;
888	td->td_retval[1] = frame->tf_edx;
889
890	return (0);
891}
892
893/*
894 * If a linux binary is exec'ing something, try this image activator
895 * first.  We override standard shell script execution in order to
896 * be able to modify the interpreter path.  We only do this if a linux
897 * binary is doing the exec, so we do not create an EXEC module for it.
898 */
899static int	exec_linux_imgact_try(struct image_params *iparams);
900
901static int
902exec_linux_imgact_try(struct image_params *imgp)
903{
904    const char *head = (const char *)imgp->image_header;
905    char *rpath;
906    int error = -1;
907
908    /*
909     * The interpreter for shell scripts run from a linux binary needs
910     * to be located in /compat/linux if possible in order to recursively
911     * maintain linux path emulation.
912     */
913    if (((const short *)head)[0] == SHELLMAGIC) {
914	    /*
915	     * Run our normal shell image activator.  If it succeeds attempt
916	     * to use the alternate path for the interpreter.  If an alternate
917	     * path is found, use our stringspace to store it.
918	     */
919	    if ((error = exec_shell_imgact(imgp)) == 0) {
920		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
921			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
922		    if (rpath != NULL)
923			    imgp->args->fname_buf =
924				imgp->interpreter_name = rpath;
925	    }
926    }
927    return (error);
928}
929
930/*
931 * exec_setregs may initialize some registers differently than Linux
932 * does, thus potentially confusing Linux binaries. If necessary, we
933 * override the exec_setregs default(s) here.
934 */
935static void
936exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
937{
938	struct pcb *pcb = td->td_pcb;
939
940	exec_setregs(td, imgp, stack);
941
942	/* Linux sets %gs to 0, we default to _udatasel */
943	pcb->pcb_gs = 0;
944	load_gs(0);
945
946	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
947}
948
949static void
950linux_get_machine(const char **dst)
951{
952
953	switch (cpu_class) {
954	case CPUCLASS_686:
955		*dst = "i686";
956		break;
957	case CPUCLASS_586:
958		*dst = "i586";
959		break;
960	case CPUCLASS_486:
961		*dst = "i486";
962		break;
963	default:
964		*dst = "i386";
965	}
966}
967
968struct sysentvec linux_sysvec = {
969	.sv_size	= LINUX_SYS_MAXSYSCALL,
970	.sv_table	= linux_sysent,
971	.sv_mask	= 0,
972	.sv_sigsize	= LINUX_SIGTBLSZ,
973	.sv_sigtbl	= bsd_to_linux_signal,
974	.sv_errsize	= ELAST + 1,
975	.sv_errtbl	= bsd_to_linux_errno,
976	.sv_transtrap	= translate_traps,
977	.sv_fixup	= linux_fixup,
978	.sv_sendsig	= linux_sendsig,
979	.sv_sigcode	= linux_sigcode,
980	.sv_szsigcode	= &linux_szsigcode,
981	.sv_prepsyscall	= NULL,
982	.sv_name	= "Linux a.out",
983	.sv_coredump	= NULL,
984	.sv_imgact_try	= exec_linux_imgact_try,
985	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
986	.sv_pagesize	= PAGE_SIZE,
987	.sv_minuser	= VM_MIN_ADDRESS,
988	.sv_maxuser	= VM_MAXUSER_ADDRESS,
989	.sv_usrstack	= USRSTACK,
990	.sv_psstrings	= PS_STRINGS,
991	.sv_stackprot	= VM_PROT_ALL,
992	.sv_copyout_strings = exec_copyout_strings,
993	.sv_setregs	= exec_linux_setregs,
994	.sv_fixlimit	= NULL,
995	.sv_maxssiz	= NULL,
996	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
997	.sv_set_syscall_retval = cpu_set_syscall_retval,
998	.sv_fetch_syscall_args = linux_fetch_syscall_args,
999	.sv_syscallnames = NULL,
1000	.sv_schedtail	= linux_schedtail,
1001};
1002
1003struct sysentvec elf_linux_sysvec = {
1004	.sv_size	= LINUX_SYS_MAXSYSCALL,
1005	.sv_table	= linux_sysent,
1006	.sv_mask	= 0,
1007	.sv_sigsize	= LINUX_SIGTBLSZ,
1008	.sv_sigtbl	= bsd_to_linux_signal,
1009	.sv_errsize	= ELAST + 1,
1010	.sv_errtbl	= bsd_to_linux_errno,
1011	.sv_transtrap	= translate_traps,
1012	.sv_fixup	= elf_linux_fixup,
1013	.sv_sendsig	= linux_sendsig,
1014	.sv_sigcode	= linux_sigcode,
1015	.sv_szsigcode	= &linux_szsigcode,
1016	.sv_prepsyscall	= NULL,
1017	.sv_name	= "Linux ELF",
1018	.sv_coredump	= elf32_coredump,
1019	.sv_imgact_try	= exec_linux_imgact_try,
1020	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1021	.sv_pagesize	= PAGE_SIZE,
1022	.sv_minuser	= VM_MIN_ADDRESS,
1023	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1024	.sv_usrstack	= USRSTACK,
1025	.sv_psstrings	= PS_STRINGS,
1026	.sv_stackprot	= VM_PROT_ALL,
1027	.sv_copyout_strings = linux_copyout_strings,
1028	.sv_setregs	= exec_linux_setregs,
1029	.sv_fixlimit	= NULL,
1030	.sv_maxssiz	= NULL,
1031	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32,
1032	.sv_set_syscall_retval = cpu_set_syscall_retval,
1033	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1034	.sv_syscallnames = NULL,
1035	.sv_schedtail	= linux_schedtail,
1036};
1037
1038static char GNU_ABI_VENDOR[] = "GNU";
1039static int GNULINUX_ABI_DESC = 0;
1040
1041static boolean_t
1042linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1043{
1044	const Elf32_Word *desc;
1045	uintptr_t p;
1046
1047	p = (uintptr_t)(note + 1);
1048	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1049
1050	desc = (const Elf32_Word *)p;
1051	if (desc[0] != GNULINUX_ABI_DESC)
1052		return (FALSE);
1053
1054	/*
1055	 * For linux we encode osrel as follows (see linux_mib.c):
1056	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1057	 */
1058	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1059
1060	return (TRUE);
1061}
1062
1063static Elf_Brandnote linux_brandnote = {
1064	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1065	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1066	.hdr.n_type	= 1,
1067	.vendor		= GNU_ABI_VENDOR,
1068	.flags		= BN_TRANSLATE_OSREL,
1069	.trans_osrel	= linux_trans_osrel
1070};
1071
1072static Elf32_Brandinfo linux_brand = {
1073	.brand		= ELFOSABI_LINUX,
1074	.machine	= EM_386,
1075	.compat_3_brand	= "Linux",
1076	.emul_path	= "/compat/linux",
1077	.interp_path	= "/lib/ld-linux.so.1",
1078	.sysvec		= &elf_linux_sysvec,
1079	.interp_newpath	= NULL,
1080	.brand_note	= &linux_brandnote,
1081	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1082};
1083
1084static Elf32_Brandinfo linux_glibc2brand = {
1085	.brand		= ELFOSABI_LINUX,
1086	.machine	= EM_386,
1087	.compat_3_brand	= "Linux",
1088	.emul_path	= "/compat/linux",
1089	.interp_path	= "/lib/ld-linux.so.2",
1090	.sysvec		= &elf_linux_sysvec,
1091	.interp_newpath	= NULL,
1092	.brand_note	= &linux_brandnote,
1093	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1094};
1095
1096Elf32_Brandinfo *linux_brandlist[] = {
1097	&linux_brand,
1098	&linux_glibc2brand,
1099	NULL
1100};
1101
1102static int
1103linux_elf_modevent(module_t mod, int type, void *data)
1104{
1105	Elf32_Brandinfo **brandinfo;
1106	int error;
1107	struct linux_ioctl_handler **lihp;
1108	struct linux_device_handler **ldhp;
1109
1110	error = 0;
1111
1112	switch(type) {
1113	case MOD_LOAD:
1114		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1115		     ++brandinfo)
1116			if (elf32_insert_brand_entry(*brandinfo) < 0)
1117				error = EINVAL;
1118		if (error == 0) {
1119			SET_FOREACH(lihp, linux_ioctl_handler_set)
1120				linux_ioctl_register_handler(*lihp);
1121			SET_FOREACH(ldhp, linux_device_handler_set)
1122				linux_device_register_handler(*ldhp);
1123			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1124			sx_init(&emul_shared_lock, "emuldata->shared lock");
1125			LIST_INIT(&futex_list);
1126			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1127			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1128			      NULL, 1000);
1129			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1130			      NULL, 1000);
1131			linux_get_machine(&linux_platform);
1132			linux_szplatform = roundup(strlen(linux_platform) + 1,
1133			    sizeof(char *));
1134			linux_osd_jail_register();
1135			stclohz = (stathz ? stathz : hz);
1136			if (bootverbose)
1137				printf("Linux ELF exec handler installed\n");
1138		} else
1139			printf("cannot insert Linux ELF brand handler\n");
1140		break;
1141	case MOD_UNLOAD:
1142		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1143		     ++brandinfo)
1144			if (elf32_brand_inuse(*brandinfo))
1145				error = EBUSY;
1146		if (error == 0) {
1147			for (brandinfo = &linux_brandlist[0];
1148			     *brandinfo != NULL; ++brandinfo)
1149				if (elf32_remove_brand_entry(*brandinfo) < 0)
1150					error = EINVAL;
1151		}
1152		if (error == 0) {
1153			SET_FOREACH(lihp, linux_ioctl_handler_set)
1154				linux_ioctl_unregister_handler(*lihp);
1155			SET_FOREACH(ldhp, linux_device_handler_set)
1156				linux_device_unregister_handler(*ldhp);
1157			mtx_destroy(&emul_lock);
1158			sx_destroy(&emul_shared_lock);
1159			mtx_destroy(&futex_mtx);
1160			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1161			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1162			linux_osd_jail_deregister();
1163			if (bootverbose)
1164				printf("Linux ELF exec handler removed\n");
1165		} else
1166			printf("Could not deinstall ELF interpreter entry\n");
1167		break;
1168	default:
1169		return EOPNOTSUPP;
1170	}
1171	return error;
1172}
1173
1174static moduledata_t linux_elf_mod = {
1175	"linuxelf",
1176	linux_elf_modevent,
1177	0
1178};
1179
1180DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1181