linux32_sysvec.c revision 294904
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 S��ren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer
14 *    in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: releng/10.2/sys/amd64/linux32/linux32_sysvec.c 294904 2016-01-27 07:41:31Z delphij $");
35#include "opt_compat.h"
36
37#ifndef COMPAT_FREEBSD32
38#error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
39#endif
40
41#define	__ELF_WORD_SIZE	32
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/exec.h>
46#include <sys/fcntl.h>
47#include <sys/imgact.h>
48#include <sys/imgact_elf.h>
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/module.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/signalvar.h>
57#include <sys/sysctl.h>
58#include <sys/syscallsubr.h>
59#include <sys/sysent.h>
60#include <sys/sysproto.h>
61#include <sys/vnode.h>
62#include <sys/eventhandler.h>
63
64#include <vm/vm.h>
65#include <vm/pmap.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_page.h>
70#include <vm/vm_param.h>
71
72#include <machine/cpu.h>
73#include <machine/md_var.h>
74#include <machine/pcb.h>
75#include <machine/specialreg.h>
76
77#include <amd64/linux32/linux.h>
78#include <amd64/linux32/linux32_proto.h>
79#include <compat/linux/linux_emul.h>
80#include <compat/linux/linux_futex.h>
81#include <compat/linux/linux_ioctl.h>
82#include <compat/linux/linux_mib.h>
83#include <compat/linux/linux_misc.h>
84#include <compat/linux/linux_signal.h>
85#include <compat/linux/linux_util.h>
86
87MODULE_VERSION(linux, 1);
88
89MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
90
91#define	AUXARGS_ENTRY_32(pos, id, val)	\
92	do {				\
93		suword32(pos++, id);	\
94		suword32(pos++, val);	\
95	} while (0)
96
97#if BYTE_ORDER == LITTLE_ENDIAN
98#define SHELLMAGIC      0x2123 /* #! */
99#else
100#define SHELLMAGIC      0x2321
101#endif
102
103/*
104 * Allow the sendsig functions to use the ldebug() facility
105 * even though they are not syscalls themselves. Map them
106 * to syscall 0. This is slightly less bogus than using
107 * ldebug(sigreturn).
108 */
109#define	LINUX_SYS_linux_rt_sendsig	0
110#define	LINUX_SYS_linux_sendsig		0
111
112const char *linux_platform = "i686";
113static int linux_szplatform;
114extern char linux_sigcode[];
115extern int linux_szsigcode;
116
117extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
118
119SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
120SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
121
122static int	elf_linux_fixup(register_t **stack_base,
123		    struct image_params *iparams);
124static register_t *linux_copyout_strings(struct image_params *imgp);
125static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
126static void	exec_linux_setregs(struct thread *td,
127				   struct image_params *imgp, u_long stack);
128static void	linux32_fixlimit(struct rlimit *rl, int which);
129static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
130
131static eventhandler_tag linux_exit_tag;
132static eventhandler_tag linux_exec_tag;
133
134/*
135 * Linux syscalls return negative errno's, we do positive and map them
136 * Reference:
137 *   FreeBSD: src/sys/sys/errno.h
138 *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
139 *            linux-2.6.17.8/include/asm-generic/errno.h
140 */
141static int bsd_to_linux_errno[ELAST + 1] = {
142	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
143	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
144	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
145	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
146	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
147	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
148	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
149	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
150	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
151	 -72, -67, -71
152};
153
154int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
155	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
156	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
157	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
158	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
159	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
160	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
161	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
162	0, LINUX_SIGUSR1, LINUX_SIGUSR2
163};
164
165int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
166	SIGHUP, SIGINT, SIGQUIT, SIGILL,
167	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
168	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
169	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
170	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
171	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
172	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
173	SIGIO, SIGURG, SIGSYS
174};
175
176#define LINUX_T_UNKNOWN  255
177static int _bsd_to_linux_trapcode[] = {
178	LINUX_T_UNKNOWN,	/* 0 */
179	6,			/* 1  T_PRIVINFLT */
180	LINUX_T_UNKNOWN,	/* 2 */
181	3,			/* 3  T_BPTFLT */
182	LINUX_T_UNKNOWN,	/* 4 */
183	LINUX_T_UNKNOWN,	/* 5 */
184	16,			/* 6  T_ARITHTRAP */
185	254,			/* 7  T_ASTFLT */
186	LINUX_T_UNKNOWN,	/* 8 */
187	13,			/* 9  T_PROTFLT */
188	1,			/* 10 T_TRCTRAP */
189	LINUX_T_UNKNOWN,	/* 11 */
190	14,			/* 12 T_PAGEFLT */
191	LINUX_T_UNKNOWN,	/* 13 */
192	17,			/* 14 T_ALIGNFLT */
193	LINUX_T_UNKNOWN,	/* 15 */
194	LINUX_T_UNKNOWN,	/* 16 */
195	LINUX_T_UNKNOWN,	/* 17 */
196	0,			/* 18 T_DIVIDE */
197	2,			/* 19 T_NMI */
198	4,			/* 20 T_OFLOW */
199	5,			/* 21 T_BOUND */
200	7,			/* 22 T_DNA */
201	8,			/* 23 T_DOUBLEFLT */
202	9,			/* 24 T_FPOPFLT */
203	10,			/* 25 T_TSSFLT */
204	11,			/* 26 T_SEGNPFLT */
205	12,			/* 27 T_STKFLT */
206	18,			/* 28 T_MCHK */
207	19,			/* 29 T_XMMFLT */
208	15			/* 30 T_RESERVED */
209};
210#define bsd_to_linux_trapcode(code) \
211    ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
212     _bsd_to_linux_trapcode[(code)]: \
213     LINUX_T_UNKNOWN)
214
215struct linux32_ps_strings {
216	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
217	u_int ps_nargvstr;	/* the number of argument strings */
218	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
219	u_int ps_nenvstr;	/* the number of environment strings */
220};
221
222/*
223 * If FreeBSD & Linux have a difference of opinion about what a trap
224 * means, deal with it here.
225 *
226 * MPSAFE
227 */
228static int
229translate_traps(int signal, int trap_code)
230{
231	if (signal != SIGBUS)
232		return signal;
233	switch (trap_code) {
234	case T_PROTFLT:
235	case T_TSSFLT:
236	case T_DOUBLEFLT:
237	case T_PAGEFLT:
238		return SIGSEGV;
239	default:
240		return signal;
241	}
242}
243
244static int
245elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
246{
247	Elf32_Auxargs *args;
248	Elf32_Addr *base;
249	Elf32_Addr *pos, *uplatform;
250	struct linux32_ps_strings *arginfo;
251	int issetugid;
252
253	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
254	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
255
256	KASSERT(curthread->td_proc == imgp->proc,
257	    ("unsafe elf_linux_fixup(), should be curproc"));
258	base = (Elf32_Addr *)*stack_base;
259	args = (Elf32_Auxargs *)imgp->auxargs;
260	pos = base + (imgp->args->argc + imgp->args->envc + 2);
261
262	issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0;
263	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
264
265	/*
266	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
267	 * as it has appeared in the 2.4.0-rc7 first time.
268	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
269	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
270	 * is not present.
271	 * Also see linux_times() implementation.
272	 */
273	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
274		AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
275	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
276	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
277	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
278	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
279	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
280	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
281	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
282	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, issetugid);
283	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
284	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
285	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
286	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
287	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
288	if (args->execfd != -1)
289		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
290	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
291
292	free(imgp->auxargs, M_TEMP);
293	imgp->auxargs = NULL;
294
295	base--;
296	suword32(base, (uint32_t)imgp->args->argc);
297	*stack_base = (register_t *)base;
298	return 0;
299}
300
301extern unsigned long linux_sznonrtsigcode;
302
303static void
304linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
305{
306	struct thread *td = curthread;
307	struct proc *p = td->td_proc;
308	struct sigacts *psp;
309	struct trapframe *regs;
310	struct l_rt_sigframe *fp, frame;
311	int oonstack;
312	int sig;
313	int code;
314
315	sig = ksi->ksi_signo;
316	code = ksi->ksi_code;
317	PROC_LOCK_ASSERT(p, MA_OWNED);
318	psp = p->p_sigacts;
319	mtx_assert(&psp->ps_mtx, MA_OWNED);
320	regs = td->td_frame;
321	oonstack = sigonstack(regs->tf_rsp);
322
323#ifdef DEBUG
324	if (ldebug(rt_sendsig))
325		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
326		    catcher, sig, (void*)mask, code);
327#endif
328	/*
329	 * Allocate space for the signal handler context.
330	 */
331	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
332	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
333		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
334		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
335	} else
336		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
337	mtx_unlock(&psp->ps_mtx);
338
339	/*
340	 * Build the argument list for the signal handler.
341	 */
342	if (p->p_sysent->sv_sigtbl)
343		if (sig <= p->p_sysent->sv_sigsize)
344			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
345
346	bzero(&frame, sizeof(frame));
347
348	frame.sf_handler = PTROUT(catcher);
349	frame.sf_sig = sig;
350	frame.sf_siginfo = PTROUT(&fp->sf_si);
351	frame.sf_ucontext = PTROUT(&fp->sf_sc);
352
353	/* Fill in POSIX parts */
354	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
355
356	/*
357	 * Build the signal context to be used by sigreturn.
358	 */
359	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
360	frame.sf_sc.uc_link = 0;		/* XXX ??? */
361
362	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
363	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
364	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
365	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
366	PROC_UNLOCK(p);
367
368	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
369
370	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
371	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
372	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
373	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
374	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
375	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
376	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
377	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
378	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
379	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
380	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
381	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
382	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
383	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
384	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
385	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
386	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
387	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
388	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
389	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
390
391#ifdef DEBUG
392	if (ldebug(rt_sendsig))
393		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
394		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
395		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
396#endif
397
398	if (copyout(&frame, fp, sizeof(frame)) != 0) {
399		/*
400		 * Process has trashed its stack; give it an illegal
401		 * instruction to halt it in its tracks.
402		 */
403#ifdef DEBUG
404		if (ldebug(rt_sendsig))
405			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
406			    fp, oonstack);
407#endif
408		PROC_LOCK(p);
409		sigexit(td, SIGILL);
410	}
411
412	/*
413	 * Build context to run handler in.
414	 */
415	regs->tf_rsp = PTROUT(fp);
416	regs->tf_rip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
417	regs->tf_rflags &= ~(PSL_T | PSL_D);
418	regs->tf_cs = _ucode32sel;
419	regs->tf_ss = _udatasel;
420	regs->tf_ds = _udatasel;
421	regs->tf_es = _udatasel;
422	regs->tf_fs = _ufssel;
423	regs->tf_gs = _ugssel;
424	regs->tf_flags = TF_HASSEGS;
425	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
426	PROC_LOCK(p);
427	mtx_lock(&psp->ps_mtx);
428}
429
430
431/*
432 * Send an interrupt to process.
433 *
434 * Stack is set up to allow sigcode stored
435 * in u. to call routine, followed by kcall
436 * to sigreturn routine below.  After sigreturn
437 * resets the signal mask, the stack, and the
438 * frame pointer, it returns to the user
439 * specified pc, psl.
440 */
441static void
442linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
443{
444	struct thread *td = curthread;
445	struct proc *p = td->td_proc;
446	struct sigacts *psp;
447	struct trapframe *regs;
448	struct l_sigframe *fp, frame;
449	l_sigset_t lmask;
450	int oonstack, i;
451	int sig, code;
452
453	sig = ksi->ksi_signo;
454	code = ksi->ksi_code;
455	PROC_LOCK_ASSERT(p, MA_OWNED);
456	psp = p->p_sigacts;
457	mtx_assert(&psp->ps_mtx, MA_OWNED);
458	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
459		/* Signal handler installed with SA_SIGINFO. */
460		linux_rt_sendsig(catcher, ksi, mask);
461		return;
462	}
463
464	regs = td->td_frame;
465	oonstack = sigonstack(regs->tf_rsp);
466
467#ifdef DEBUG
468	if (ldebug(sendsig))
469		printf(ARGS(sendsig, "%p, %d, %p, %u"),
470		    catcher, sig, (void*)mask, code);
471#endif
472
473	/*
474	 * Allocate space for the signal handler context.
475	 */
476	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
477	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
478		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
479		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
480	} else
481		fp = (struct l_sigframe *)regs->tf_rsp - 1;
482	mtx_unlock(&psp->ps_mtx);
483	PROC_UNLOCK(p);
484
485	/*
486	 * Build the argument list for the signal handler.
487	 */
488	if (p->p_sysent->sv_sigtbl)
489		if (sig <= p->p_sysent->sv_sigsize)
490			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
491
492	bzero(&frame, sizeof(frame));
493
494	frame.sf_handler = PTROUT(catcher);
495	frame.sf_sig = sig;
496
497	bsd_to_linux_sigset(mask, &lmask);
498
499	/*
500	 * Build the signal context to be used by sigreturn.
501	 */
502	frame.sf_sc.sc_mask   = lmask.__bits[0];
503	frame.sf_sc.sc_gs     = regs->tf_gs;
504	frame.sf_sc.sc_fs     = regs->tf_fs;
505	frame.sf_sc.sc_es     = regs->tf_es;
506	frame.sf_sc.sc_ds     = regs->tf_ds;
507	frame.sf_sc.sc_edi    = regs->tf_rdi;
508	frame.sf_sc.sc_esi    = regs->tf_rsi;
509	frame.sf_sc.sc_ebp    = regs->tf_rbp;
510	frame.sf_sc.sc_ebx    = regs->tf_rbx;
511	frame.sf_sc.sc_edx    = regs->tf_rdx;
512	frame.sf_sc.sc_ecx    = regs->tf_rcx;
513	frame.sf_sc.sc_eax    = regs->tf_rax;
514	frame.sf_sc.sc_eip    = regs->tf_rip;
515	frame.sf_sc.sc_cs     = regs->tf_cs;
516	frame.sf_sc.sc_eflags = regs->tf_rflags;
517	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
518	frame.sf_sc.sc_ss     = regs->tf_ss;
519	frame.sf_sc.sc_err    = regs->tf_err;
520	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
521	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
522
523	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
524		frame.sf_extramask[i] = lmask.__bits[i+1];
525
526	if (copyout(&frame, fp, sizeof(frame)) != 0) {
527		/*
528		 * Process has trashed its stack; give it an illegal
529		 * instruction to halt it in its tracks.
530		 */
531		PROC_LOCK(p);
532		sigexit(td, SIGILL);
533	}
534
535	/*
536	 * Build context to run handler in.
537	 */
538	regs->tf_rsp = PTROUT(fp);
539	regs->tf_rip = p->p_sysent->sv_sigcode_base;
540	regs->tf_rflags &= ~(PSL_T | PSL_D);
541	regs->tf_cs = _ucode32sel;
542	regs->tf_ss = _udatasel;
543	regs->tf_ds = _udatasel;
544	regs->tf_es = _udatasel;
545	regs->tf_fs = _ufssel;
546	regs->tf_gs = _ugssel;
547	regs->tf_flags = TF_HASSEGS;
548	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
549	PROC_LOCK(p);
550	mtx_lock(&psp->ps_mtx);
551}
552
553/*
554 * System call to cleanup state after a signal
555 * has been taken.  Reset signal mask and
556 * stack state from context left by sendsig (above).
557 * Return to previous pc and psl as specified by
558 * context left by sendsig. Check carefully to
559 * make sure that the user has not modified the
560 * psl to gain improper privileges or to cause
561 * a machine fault.
562 */
563int
564linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
565{
566	struct l_sigframe frame;
567	struct trapframe *regs;
568	sigset_t bmask;
569	l_sigset_t lmask;
570	int eflags, i;
571	ksiginfo_t ksi;
572
573	regs = td->td_frame;
574
575#ifdef DEBUG
576	if (ldebug(sigreturn))
577		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
578#endif
579	/*
580	 * The trampoline code hands us the sigframe.
581	 * It is unsafe to keep track of it ourselves, in the event that a
582	 * program jumps out of a signal handler.
583	 */
584	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
585		return (EFAULT);
586
587	/*
588	 * Check for security violations.
589	 */
590#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
591	eflags = frame.sf_sc.sc_eflags;
592	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
593		return(EINVAL);
594
595	/*
596	 * Don't allow users to load a valid privileged %cs.  Let the
597	 * hardware check for invalid selectors, excess privilege in
598	 * other selectors, invalid %eip's and invalid %esp's.
599	 */
600#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
601	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
602		ksiginfo_init_trap(&ksi);
603		ksi.ksi_signo = SIGBUS;
604		ksi.ksi_code = BUS_OBJERR;
605		ksi.ksi_trapno = T_PROTFLT;
606		ksi.ksi_addr = (void *)regs->tf_rip;
607		trapsignal(td, &ksi);
608		return(EINVAL);
609	}
610
611	lmask.__bits[0] = frame.sf_sc.sc_mask;
612	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
613		lmask.__bits[i+1] = frame.sf_extramask[i];
614	linux_to_bsd_sigset(&lmask, &bmask);
615	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
616
617	/*
618	 * Restore signal context.
619	 */
620	regs->tf_rdi    = frame.sf_sc.sc_edi;
621	regs->tf_rsi    = frame.sf_sc.sc_esi;
622	regs->tf_rbp    = frame.sf_sc.sc_ebp;
623	regs->tf_rbx    = frame.sf_sc.sc_ebx;
624	regs->tf_rdx    = frame.sf_sc.sc_edx;
625	regs->tf_rcx    = frame.sf_sc.sc_ecx;
626	regs->tf_rax    = frame.sf_sc.sc_eax;
627	regs->tf_rip    = frame.sf_sc.sc_eip;
628	regs->tf_cs     = frame.sf_sc.sc_cs;
629	regs->tf_ds     = frame.sf_sc.sc_ds;
630	regs->tf_es     = frame.sf_sc.sc_es;
631	regs->tf_fs     = frame.sf_sc.sc_fs;
632	regs->tf_gs     = frame.sf_sc.sc_gs;
633	regs->tf_rflags = eflags;
634	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
635	regs->tf_ss     = frame.sf_sc.sc_ss;
636	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
637
638	return (EJUSTRETURN);
639}
640
641/*
642 * System call to cleanup state after a signal
643 * has been taken.  Reset signal mask and
644 * stack state from context left by rt_sendsig (above).
645 * Return to previous pc and psl as specified by
646 * context left by sendsig. Check carefully to
647 * make sure that the user has not modified the
648 * psl to gain improper privileges or to cause
649 * a machine fault.
650 */
651int
652linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
653{
654	struct l_ucontext uc;
655	struct l_sigcontext *context;
656	sigset_t bmask;
657	l_stack_t *lss;
658	stack_t ss;
659	struct trapframe *regs;
660	int eflags;
661	ksiginfo_t ksi;
662
663	regs = td->td_frame;
664
665#ifdef DEBUG
666	if (ldebug(rt_sigreturn))
667		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
668#endif
669	/*
670	 * The trampoline code hands us the ucontext.
671	 * It is unsafe to keep track of it ourselves, in the event that a
672	 * program jumps out of a signal handler.
673	 */
674	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
675		return (EFAULT);
676
677	context = &uc.uc_mcontext;
678
679	/*
680	 * Check for security violations.
681	 */
682#define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
683	eflags = context->sc_eflags;
684	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
685		return(EINVAL);
686
687	/*
688	 * Don't allow users to load a valid privileged %cs.  Let the
689	 * hardware check for invalid selectors, excess privilege in
690	 * other selectors, invalid %eip's and invalid %esp's.
691	 */
692#define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
693	if (!CS_SECURE(context->sc_cs)) {
694		ksiginfo_init_trap(&ksi);
695		ksi.ksi_signo = SIGBUS;
696		ksi.ksi_code = BUS_OBJERR;
697		ksi.ksi_trapno = T_PROTFLT;
698		ksi.ksi_addr = (void *)regs->tf_rip;
699		trapsignal(td, &ksi);
700		return(EINVAL);
701	}
702
703	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
704	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
705
706	/*
707	 * Restore signal context
708	 */
709	regs->tf_gs	= context->sc_gs;
710	regs->tf_fs	= context->sc_fs;
711	regs->tf_es	= context->sc_es;
712	regs->tf_ds	= context->sc_ds;
713	regs->tf_rdi    = context->sc_edi;
714	regs->tf_rsi    = context->sc_esi;
715	regs->tf_rbp    = context->sc_ebp;
716	regs->tf_rbx    = context->sc_ebx;
717	regs->tf_rdx    = context->sc_edx;
718	regs->tf_rcx    = context->sc_ecx;
719	regs->tf_rax    = context->sc_eax;
720	regs->tf_rip    = context->sc_eip;
721	regs->tf_cs     = context->sc_cs;
722	regs->tf_rflags = eflags;
723	regs->tf_rsp    = context->sc_esp_at_signal;
724	regs->tf_ss     = context->sc_ss;
725	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
726
727	/*
728	 * call sigaltstack & ignore results..
729	 */
730	lss = &uc.uc_stack;
731	ss.ss_sp = PTRIN(lss->ss_sp);
732	ss.ss_size = lss->ss_size;
733	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
734
735#ifdef DEBUG
736	if (ldebug(rt_sigreturn))
737		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
738		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
739#endif
740	(void)kern_sigaltstack(td, &ss, NULL);
741
742	return (EJUSTRETURN);
743}
744
745static int
746linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
747{
748	struct proc *p;
749	struct trapframe *frame;
750
751	p = td->td_proc;
752	frame = td->td_frame;
753
754	sa->args[0] = frame->tf_rbx;
755	sa->args[1] = frame->tf_rcx;
756	sa->args[2] = frame->tf_rdx;
757	sa->args[3] = frame->tf_rsi;
758	sa->args[4] = frame->tf_rdi;
759	sa->args[5] = frame->tf_rbp;	/* Unconfirmed */
760	sa->code = frame->tf_rax;
761
762	if (sa->code >= p->p_sysent->sv_size)
763		sa->callp = &p->p_sysent->sv_table[0];
764	else
765		sa->callp = &p->p_sysent->sv_table[sa->code];
766	sa->narg = sa->callp->sy_narg;
767
768	td->td_retval[0] = 0;
769	td->td_retval[1] = frame->tf_rdx;
770
771	return (0);
772}
773
774/*
775 * If a linux binary is exec'ing something, try this image activator
776 * first.  We override standard shell script execution in order to
777 * be able to modify the interpreter path.  We only do this if a linux
778 * binary is doing the exec, so we do not create an EXEC module for it.
779 */
780static int	exec_linux_imgact_try(struct image_params *iparams);
781
782static int
783exec_linux_imgact_try(struct image_params *imgp)
784{
785	const char *head = (const char *)imgp->image_header;
786	char *rpath;
787	int error = -1;
788
789	/*
790	* The interpreter for shell scripts run from a linux binary needs
791	* to be located in /compat/linux if possible in order to recursively
792	* maintain linux path emulation.
793	*/
794	if (((const short *)head)[0] == SHELLMAGIC) {
795		/*
796		* Run our normal shell image activator.  If it succeeds attempt
797		* to use the alternate path for the interpreter.  If an
798		* alternate * path is found, use our stringspace to store it.
799		*/
800		if ((error = exec_shell_imgact(imgp)) == 0) {
801			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
802			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
803			    AT_FDCWD);
804			if (rpath != NULL)
805				imgp->args->fname_buf =
806				    imgp->interpreter_name = rpath;
807		}
808	}
809	return (error);
810}
811
812/*
813 * Clear registers on exec
814 * XXX copied from ia32_signal.c.
815 */
816static void
817exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
818{
819	struct trapframe *regs = td->td_frame;
820	struct pcb *pcb = td->td_pcb;
821
822	mtx_lock(&dt_lock);
823	if (td->td_proc->p_md.md_ldt != NULL)
824		user_ldt_free(td);
825	else
826		mtx_unlock(&dt_lock);
827
828	critical_enter();
829	wrmsr(MSR_FSBASE, 0);
830	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
831	pcb->pcb_fsbase = 0;
832	pcb->pcb_gsbase = 0;
833	critical_exit();
834	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
835
836	bzero((char *)regs, sizeof(struct trapframe));
837	regs->tf_rip = imgp->entry_addr;
838	regs->tf_rsp = stack;
839	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
840	regs->tf_gs = _ugssel;
841	regs->tf_fs = _ufssel;
842	regs->tf_es = _udatasel;
843	regs->tf_ds = _udatasel;
844	regs->tf_ss = _udatasel;
845	regs->tf_flags = TF_HASSEGS;
846	regs->tf_cs = _ucode32sel;
847	regs->tf_rbx = imgp->ps_strings;
848
849	fpstate_drop(td);
850
851	/* Do full restore on return so that we can change to a different %cs */
852	set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
853	td->td_retval[1] = 0;
854}
855
856/*
857 * XXX copied from ia32_sysvec.c.
858 */
859static register_t *
860linux_copyout_strings(struct image_params *imgp)
861{
862	int argc, envc;
863	u_int32_t *vectp;
864	char *stringp, *destp;
865	u_int32_t *stack_base;
866	struct linux32_ps_strings *arginfo;
867
868	/*
869	 * Calculate string base and vector table pointers.
870	 * Also deal with signal trampoline code for this exec type.
871	 */
872	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
873	destp =	(caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
874	    roundup((ARG_MAX - imgp->args->stringspace),
875	    sizeof(char *));
876
877	/*
878	 * Install LINUX_PLATFORM
879	 */
880	copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
881	    linux_szplatform);
882
883	/*
884	 * If we have a valid auxargs ptr, prepare some room
885	 * on the stack.
886	 */
887	if (imgp->auxargs) {
888		/*
889		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
890		 * lower compatibility.
891		 */
892		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
893		    (LINUX_AT_COUNT * 2);
894		/*
895		 * The '+ 2' is for the null pointers at the end of each of
896		 * the arg and env vector sets,and imgp->auxarg_size is room
897		 * for argument of Runtime loader.
898		 */
899		vectp = (u_int32_t *) (destp - (imgp->args->argc +
900		    imgp->args->envc + 2 + imgp->auxarg_size) *
901		    sizeof(u_int32_t));
902
903	} else
904		/*
905		 * The '+ 2' is for the null pointers at the end of each of
906		 * the arg and env vector sets
907		 */
908		vectp = (u_int32_t *)(destp - (imgp->args->argc +
909		    imgp->args->envc + 2) * sizeof(u_int32_t));
910
911	/*
912	 * vectp also becomes our initial stack base
913	 */
914	stack_base = vectp;
915
916	stringp = imgp->args->begin_argv;
917	argc = imgp->args->argc;
918	envc = imgp->args->envc;
919	/*
920	 * Copy out strings - arguments and environment.
921	 */
922	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
923
924	/*
925	 * Fill in "ps_strings" struct for ps, w, etc.
926	 */
927	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
928	suword32(&arginfo->ps_nargvstr, argc);
929
930	/*
931	 * Fill in argument portion of vector table.
932	 */
933	for (; argc > 0; --argc) {
934		suword32(vectp++, (uint32_t)(intptr_t)destp);
935		while (*stringp++ != 0)
936			destp++;
937		destp++;
938	}
939
940	/* a null vector table pointer separates the argp's from the envp's */
941	suword32(vectp++, 0);
942
943	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
944	suword32(&arginfo->ps_nenvstr, envc);
945
946	/*
947	 * Fill in environment portion of vector table.
948	 */
949	for (; envc > 0; --envc) {
950		suword32(vectp++, (uint32_t)(intptr_t)destp);
951		while (*stringp++ != 0)
952			destp++;
953		destp++;
954	}
955
956	/* end of vector table is a null pointer */
957	suword32(vectp, 0);
958
959	return ((register_t *)stack_base);
960}
961
962static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
963    "32-bit Linux emulation");
964
965static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
966SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
967    &linux32_maxdsiz, 0, "");
968static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
969SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
970    &linux32_maxssiz, 0, "");
971static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
972SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
973    &linux32_maxvmem, 0, "");
974
975static void
976linux32_fixlimit(struct rlimit *rl, int which)
977{
978
979	switch (which) {
980	case RLIMIT_DATA:
981		if (linux32_maxdsiz != 0) {
982			if (rl->rlim_cur > linux32_maxdsiz)
983				rl->rlim_cur = linux32_maxdsiz;
984			if (rl->rlim_max > linux32_maxdsiz)
985				rl->rlim_max = linux32_maxdsiz;
986		}
987		break;
988	case RLIMIT_STACK:
989		if (linux32_maxssiz != 0) {
990			if (rl->rlim_cur > linux32_maxssiz)
991				rl->rlim_cur = linux32_maxssiz;
992			if (rl->rlim_max > linux32_maxssiz)
993				rl->rlim_max = linux32_maxssiz;
994		}
995		break;
996	case RLIMIT_VMEM:
997		if (linux32_maxvmem != 0) {
998			if (rl->rlim_cur > linux32_maxvmem)
999				rl->rlim_cur = linux32_maxvmem;
1000			if (rl->rlim_max > linux32_maxvmem)
1001				rl->rlim_max = linux32_maxvmem;
1002		}
1003		break;
1004	}
1005}
1006
1007struct sysentvec elf_linux_sysvec = {
1008	.sv_size	= LINUX_SYS_MAXSYSCALL,
1009	.sv_table	= linux_sysent,
1010	.sv_mask	= 0,
1011	.sv_sigsize	= LINUX_SIGTBLSZ,
1012	.sv_sigtbl	= bsd_to_linux_signal,
1013	.sv_errsize	= ELAST + 1,
1014	.sv_errtbl	= bsd_to_linux_errno,
1015	.sv_transtrap	= translate_traps,
1016	.sv_fixup	= elf_linux_fixup,
1017	.sv_sendsig	= linux_sendsig,
1018	.sv_sigcode	= linux_sigcode,
1019	.sv_szsigcode	= &linux_szsigcode,
1020	.sv_prepsyscall	= NULL,
1021	.sv_name	= "Linux ELF32",
1022	.sv_coredump	= elf32_coredump,
1023	.sv_imgact_try	= exec_linux_imgact_try,
1024	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1025	.sv_pagesize	= PAGE_SIZE,
1026	.sv_minuser	= VM_MIN_ADDRESS,
1027	.sv_maxuser	= LINUX32_MAXUSER,
1028	.sv_usrstack	= LINUX32_USRSTACK,
1029	.sv_psstrings	= LINUX32_PS_STRINGS,
1030	.sv_stackprot	= VM_PROT_ALL,
1031	.sv_copyout_strings = linux_copyout_strings,
1032	.sv_setregs	= exec_linux_setregs,
1033	.sv_fixlimit	= linux32_fixlimit,
1034	.sv_maxssiz	= &linux32_maxssiz,
1035	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
1036	.sv_set_syscall_retval = cpu_set_syscall_retval,
1037	.sv_fetch_syscall_args = linux32_fetch_syscall_args,
1038	.sv_syscallnames = NULL,
1039	.sv_shared_page_base = LINUX32_SHAREDPAGE,
1040	.sv_shared_page_len = PAGE_SIZE,
1041	.sv_schedtail	= linux_schedtail,
1042};
1043INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1044
1045static char GNU_ABI_VENDOR[] = "GNU";
1046static int GNULINUX_ABI_DESC = 0;
1047
1048static boolean_t
1049linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1050{
1051	const Elf32_Word *desc;
1052	uintptr_t p;
1053
1054	p = (uintptr_t)(note + 1);
1055	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1056
1057	desc = (const Elf32_Word *)p;
1058	if (desc[0] != GNULINUX_ABI_DESC)
1059		return (FALSE);
1060
1061	/*
1062	 * For linux we encode osrel as follows (see linux_mib.c):
1063	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1064	 */
1065	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1066
1067	return (TRUE);
1068}
1069
1070static Elf_Brandnote linux32_brandnote = {
1071	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1072	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1073	.hdr.n_type	= 1,
1074	.vendor		= GNU_ABI_VENDOR,
1075	.flags		= BN_TRANSLATE_OSREL,
1076	.trans_osrel	= linux32_trans_osrel
1077};
1078
1079static Elf32_Brandinfo linux_brand = {
1080	.brand		= ELFOSABI_LINUX,
1081	.machine	= EM_386,
1082	.compat_3_brand	= "Linux",
1083	.emul_path	= "/compat/linux",
1084	.interp_path	= "/lib/ld-linux.so.1",
1085	.sysvec		= &elf_linux_sysvec,
1086	.interp_newpath	= NULL,
1087	.brand_note	= &linux32_brandnote,
1088	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1089};
1090
1091static Elf32_Brandinfo linux_glibc2brand = {
1092	.brand		= ELFOSABI_LINUX,
1093	.machine	= EM_386,
1094	.compat_3_brand	= "Linux",
1095	.emul_path	= "/compat/linux",
1096	.interp_path	= "/lib/ld-linux.so.2",
1097	.sysvec		= &elf_linux_sysvec,
1098	.interp_newpath	= NULL,
1099	.brand_note	= &linux32_brandnote,
1100	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1101};
1102
1103Elf32_Brandinfo *linux_brandlist[] = {
1104	&linux_brand,
1105	&linux_glibc2brand,
1106	NULL
1107};
1108
1109static int
1110linux_elf_modevent(module_t mod, int type, void *data)
1111{
1112	Elf32_Brandinfo **brandinfo;
1113	int error;
1114	struct linux_ioctl_handler **lihp;
1115	struct linux_device_handler **ldhp;
1116
1117	error = 0;
1118
1119	switch(type) {
1120	case MOD_LOAD:
1121		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1122		     ++brandinfo)
1123			if (elf32_insert_brand_entry(*brandinfo) < 0)
1124				error = EINVAL;
1125		if (error == 0) {
1126			SET_FOREACH(lihp, linux_ioctl_handler_set)
1127				linux_ioctl_register_handler(*lihp);
1128			SET_FOREACH(ldhp, linux_device_handler_set)
1129				linux_device_register_handler(*ldhp);
1130			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1131			sx_init(&emul_shared_lock, "emuldata->shared lock");
1132			LIST_INIT(&futex_list);
1133			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1134			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1135			    linux_proc_exit, NULL, 1000);
1136			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1137			    linux_proc_exec, NULL, 1000);
1138			linux_szplatform = roundup(strlen(linux_platform) + 1,
1139			    sizeof(char *));
1140			linux_osd_jail_register();
1141			stclohz = (stathz ? stathz : hz);
1142			if (bootverbose)
1143				printf("Linux ELF exec handler installed\n");
1144		} else
1145			printf("cannot insert Linux ELF brand handler\n");
1146		break;
1147	case MOD_UNLOAD:
1148		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1149		     ++brandinfo)
1150			if (elf32_brand_inuse(*brandinfo))
1151				error = EBUSY;
1152		if (error == 0) {
1153			for (brandinfo = &linux_brandlist[0];
1154			     *brandinfo != NULL; ++brandinfo)
1155				if (elf32_remove_brand_entry(*brandinfo) < 0)
1156					error = EINVAL;
1157		}
1158		if (error == 0) {
1159			SET_FOREACH(lihp, linux_ioctl_handler_set)
1160				linux_ioctl_unregister_handler(*lihp);
1161			SET_FOREACH(ldhp, linux_device_handler_set)
1162				linux_device_unregister_handler(*ldhp);
1163			mtx_destroy(&emul_lock);
1164			sx_destroy(&emul_shared_lock);
1165			mtx_destroy(&futex_mtx);
1166			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1167			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1168			linux_osd_jail_deregister();
1169			if (bootverbose)
1170				printf("Linux ELF exec handler removed\n");
1171		} else
1172			printf("Could not deinstall ELF interpreter entry\n");
1173		break;
1174	default:
1175		return EOPNOTSUPP;
1176	}
1177	return error;
1178}
1179
1180static moduledata_t linux_elf_mod = {
1181	"linuxelf",
1182	linux_elf_modevent,
1183	0
1184};
1185
1186DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1187