Deleted Added
full compact
linux32_sysvec.c (195486) linux32_sysvec.c (196512)
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 S�ren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 S�ren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/amd64/linux32/linux32_sysvec.c 195486 2009-07-09 09:34:11Z kib $");
34__FBSDID("$FreeBSD: head/sys/amd64/linux32/linux32_sysvec.c 196512 2009-08-24 16:19:47Z bz $");
35#include "opt_compat.h"
36
37#ifndef COMPAT_IA32
38#error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39#endif
40
41#define __ELF_WORD_SIZE 32
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/exec.h>
46#include <sys/fcntl.h>
47#include <sys/imgact.h>
48#include <sys/imgact_elf.h>
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/module.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/signalvar.h>
57#include <sys/sysctl.h>
58#include <sys/syscallsubr.h>
59#include <sys/sysent.h>
60#include <sys/sysproto.h>
61#include <sys/vnode.h>
62#include <sys/eventhandler.h>
63
64#include <vm/vm.h>
65#include <vm/pmap.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_page.h>
70#include <vm/vm_param.h>
71
72#include <machine/cpu.h>
73#include <machine/md_var.h>
74#include <machine/pcb.h>
75#include <machine/specialreg.h>
76
77#include <amd64/linux32/linux.h>
78#include <amd64/linux32/linux32_proto.h>
79#include <compat/linux/linux_futex.h>
80#include <compat/linux/linux_emul.h>
81#include <compat/linux/linux_mib.h>
82#include <compat/linux/linux_misc.h>
83#include <compat/linux/linux_signal.h>
84#include <compat/linux/linux_util.h>
85
86MODULE_VERSION(linux, 1);
87
88MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
89
90#define AUXARGS_ENTRY_32(pos, id, val) \
91 do { \
92 suword32(pos++, id); \
93 suword32(pos++, val); \
94 } while (0)
95
96#if BYTE_ORDER == LITTLE_ENDIAN
97#define SHELLMAGIC 0x2123 /* #! */
98#else
99#define SHELLMAGIC 0x2321
100#endif
101
102/*
103 * Allow the sendsig functions to use the ldebug() facility
104 * even though they are not syscalls themselves. Map them
105 * to syscall 0. This is slightly less bogus than using
106 * ldebug(sigreturn).
107 */
108#define LINUX_SYS_linux_rt_sendsig 0
109#define LINUX_SYS_linux_sendsig 0
110
111const char *linux_platform = "i686";
112static int linux_szplatform;
113extern char linux_sigcode[];
114extern int linux_szsigcode;
115
116extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
117
118SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
119SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
120
121static int elf_linux_fixup(register_t **stack_base,
122 struct image_params *iparams);
123static register_t *linux_copyout_strings(struct image_params *imgp);
124static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
125 caddr_t *params);
126static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
127static void exec_linux_setregs(struct thread *td, u_long entry,
128 u_long stack, u_long ps_strings);
129static void linux32_fixlimit(struct rlimit *rl, int which);
35#include "opt_compat.h"
36
37#ifndef COMPAT_IA32
38#error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39#endif
40
41#define __ELF_WORD_SIZE 32
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/exec.h>
46#include <sys/fcntl.h>
47#include <sys/imgact.h>
48#include <sys/imgact_elf.h>
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/module.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/signalvar.h>
57#include <sys/sysctl.h>
58#include <sys/syscallsubr.h>
59#include <sys/sysent.h>
60#include <sys/sysproto.h>
61#include <sys/vnode.h>
62#include <sys/eventhandler.h>
63
64#include <vm/vm.h>
65#include <vm/pmap.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_page.h>
70#include <vm/vm_param.h>
71
72#include <machine/cpu.h>
73#include <machine/md_var.h>
74#include <machine/pcb.h>
75#include <machine/specialreg.h>
76
77#include <amd64/linux32/linux.h>
78#include <amd64/linux32/linux32_proto.h>
79#include <compat/linux/linux_futex.h>
80#include <compat/linux/linux_emul.h>
81#include <compat/linux/linux_mib.h>
82#include <compat/linux/linux_misc.h>
83#include <compat/linux/linux_signal.h>
84#include <compat/linux/linux_util.h>
85
86MODULE_VERSION(linux, 1);
87
88MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
89
90#define AUXARGS_ENTRY_32(pos, id, val) \
91 do { \
92 suword32(pos++, id); \
93 suword32(pos++, val); \
94 } while (0)
95
96#if BYTE_ORDER == LITTLE_ENDIAN
97#define SHELLMAGIC 0x2123 /* #! */
98#else
99#define SHELLMAGIC 0x2321
100#endif
101
102/*
103 * Allow the sendsig functions to use the ldebug() facility
104 * even though they are not syscalls themselves. Map them
105 * to syscall 0. This is slightly less bogus than using
106 * ldebug(sigreturn).
107 */
108#define LINUX_SYS_linux_rt_sendsig 0
109#define LINUX_SYS_linux_sendsig 0
110
111const char *linux_platform = "i686";
112static int linux_szplatform;
113extern char linux_sigcode[];
114extern int linux_szsigcode;
115
116extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
117
118SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
119SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
120
121static int elf_linux_fixup(register_t **stack_base,
122 struct image_params *iparams);
123static register_t *linux_copyout_strings(struct image_params *imgp);
124static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
125 caddr_t *params);
126static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
127static void exec_linux_setregs(struct thread *td, u_long entry,
128 u_long stack, u_long ps_strings);
129static void linux32_fixlimit(struct rlimit *rl, int which);
130static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
130
131static eventhandler_tag linux_exit_tag;
132static eventhandler_tag linux_schedtail_tag;
133static eventhandler_tag linux_exec_tag;
134
135/*
136 * Linux syscalls return negative errno's, we do positive and map them
137 * Reference:
138 * FreeBSD: src/sys/sys/errno.h
139 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
140 * linux-2.6.17.8/include/asm-generic/errno.h
141 */
142static int bsd_to_linux_errno[ELAST + 1] = {
143 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
144 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
145 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
146 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
147 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
148 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
149 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
150 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
151 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
152 -72, -67, -71
153};
154
155int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
156 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
157 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
158 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
159 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
160 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
161 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
162 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
163 0, LINUX_SIGUSR1, LINUX_SIGUSR2
164};
165
166int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
167 SIGHUP, SIGINT, SIGQUIT, SIGILL,
168 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
169 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
170 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
171 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
172 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
173 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
174 SIGIO, SIGURG, SIGSYS
175};
176
177#define LINUX_T_UNKNOWN 255
178static int _bsd_to_linux_trapcode[] = {
179 LINUX_T_UNKNOWN, /* 0 */
180 6, /* 1 T_PRIVINFLT */
181 LINUX_T_UNKNOWN, /* 2 */
182 3, /* 3 T_BPTFLT */
183 LINUX_T_UNKNOWN, /* 4 */
184 LINUX_T_UNKNOWN, /* 5 */
185 16, /* 6 T_ARITHTRAP */
186 254, /* 7 T_ASTFLT */
187 LINUX_T_UNKNOWN, /* 8 */
188 13, /* 9 T_PROTFLT */
189 1, /* 10 T_TRCTRAP */
190 LINUX_T_UNKNOWN, /* 11 */
191 14, /* 12 T_PAGEFLT */
192 LINUX_T_UNKNOWN, /* 13 */
193 17, /* 14 T_ALIGNFLT */
194 LINUX_T_UNKNOWN, /* 15 */
195 LINUX_T_UNKNOWN, /* 16 */
196 LINUX_T_UNKNOWN, /* 17 */
197 0, /* 18 T_DIVIDE */
198 2, /* 19 T_NMI */
199 4, /* 20 T_OFLOW */
200 5, /* 21 T_BOUND */
201 7, /* 22 T_DNA */
202 8, /* 23 T_DOUBLEFLT */
203 9, /* 24 T_FPOPFLT */
204 10, /* 25 T_TSSFLT */
205 11, /* 26 T_SEGNPFLT */
206 12, /* 27 T_STKFLT */
207 18, /* 28 T_MCHK */
208 19, /* 29 T_XMMFLT */
209 15 /* 30 T_RESERVED */
210};
211#define bsd_to_linux_trapcode(code) \
212 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
213 _bsd_to_linux_trapcode[(code)]: \
214 LINUX_T_UNKNOWN)
215
216struct linux32_ps_strings {
217 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
218 u_int ps_nargvstr; /* the number of argument strings */
219 u_int32_t ps_envstr; /* first of 0 or more environment strings */
220 u_int ps_nenvstr; /* the number of environment strings */
221};
222
223/*
224 * If FreeBSD & Linux have a difference of opinion about what a trap
225 * means, deal with it here.
226 *
227 * MPSAFE
228 */
229static int
230translate_traps(int signal, int trap_code)
231{
232 if (signal != SIGBUS)
233 return signal;
234 switch (trap_code) {
235 case T_PROTFLT:
236 case T_TSSFLT:
237 case T_DOUBLEFLT:
238 case T_PAGEFLT:
239 return SIGSEGV;
240 default:
241 return signal;
242 }
243}
244
245static int
246elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
247{
248 Elf32_Auxargs *args;
249 Elf32_Addr *base;
250 Elf32_Addr *pos, *uplatform;
251 struct linux32_ps_strings *arginfo;
252
253 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
254 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
255 linux_szplatform);
256
257 KASSERT(curthread->td_proc == imgp->proc,
258 ("unsafe elf_linux_fixup(), should be curproc"));
259 base = (Elf32_Addr *)*stack_base;
260 args = (Elf32_Auxargs *)imgp->auxargs;
261 pos = base + (imgp->args->argc + imgp->args->envc + 2);
262
263 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
264
265 /*
266 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
267 * as it has appeared in the 2.4.0-rc7 first time.
268 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
269 * glibc falls back to the hard-coded CLK_TCK value when aux entry
270 * is not present.
271 * Also see linux_times() implementation.
272 */
273 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
274 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
275 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
276 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
277 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
278 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
279 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
280 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
281 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
282 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
283 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
284 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
285 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
286 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
287 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
288 if (args->execfd != -1)
289 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
290 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
291
292 free(imgp->auxargs, M_TEMP);
293 imgp->auxargs = NULL;
294
295 base--;
296 suword32(base, (uint32_t)imgp->args->argc);
297 *stack_base = (register_t *)base;
298 return 0;
299}
300
301extern unsigned long linux_sznonrtsigcode;
302
303static void
304linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
305{
306 struct thread *td = curthread;
307 struct proc *p = td->td_proc;
308 struct sigacts *psp;
309 struct trapframe *regs;
310 struct l_rt_sigframe *fp, frame;
311 int oonstack;
312 int sig;
313 int code;
314
315 sig = ksi->ksi_signo;
316 code = ksi->ksi_code;
317 PROC_LOCK_ASSERT(p, MA_OWNED);
318 psp = p->p_sigacts;
319 mtx_assert(&psp->ps_mtx, MA_OWNED);
320 regs = td->td_frame;
321 oonstack = sigonstack(regs->tf_rsp);
322
323#ifdef DEBUG
324 if (ldebug(rt_sendsig))
325 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
326 catcher, sig, (void*)mask, code);
327#endif
328 /*
329 * Allocate space for the signal handler context.
330 */
331 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
332 SIGISMEMBER(psp->ps_sigonstack, sig)) {
333 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
334 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
335 } else
336 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
337 mtx_unlock(&psp->ps_mtx);
338
339 /*
340 * Build the argument list for the signal handler.
341 */
342 if (p->p_sysent->sv_sigtbl)
343 if (sig <= p->p_sysent->sv_sigsize)
344 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
345
346 bzero(&frame, sizeof(frame));
347
348 frame.sf_handler = PTROUT(catcher);
349 frame.sf_sig = sig;
350 frame.sf_siginfo = PTROUT(&fp->sf_si);
351 frame.sf_ucontext = PTROUT(&fp->sf_sc);
352
353 /* Fill in POSIX parts */
354 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
355
356 /*
357 * Build the signal context to be used by sigreturn.
358 */
359 frame.sf_sc.uc_flags = 0; /* XXX ??? */
360 frame.sf_sc.uc_link = 0; /* XXX ??? */
361
362 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
363 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
364 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
365 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
366 PROC_UNLOCK(p);
367
368 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
369
370 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
371 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
372 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
373 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
374 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
375 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
376 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
377 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
378 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
379 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
380 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
381 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
382 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
383 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
384 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
385 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
386 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
387 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
388 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
389 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
390
391#ifdef DEBUG
392 if (ldebug(rt_sendsig))
393 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
394 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
395 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
396#endif
397
398 if (copyout(&frame, fp, sizeof(frame)) != 0) {
399 /*
400 * Process has trashed its stack; give it an illegal
401 * instruction to halt it in its tracks.
402 */
403#ifdef DEBUG
404 if (ldebug(rt_sendsig))
405 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
406 fp, oonstack);
407#endif
408 PROC_LOCK(p);
409 sigexit(td, SIGILL);
410 }
411
412 /*
413 * Build context to run handler in.
414 */
415 regs->tf_rsp = PTROUT(fp);
416 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
417 linux_sznonrtsigcode;
418 regs->tf_rflags &= ~(PSL_T | PSL_D);
419 regs->tf_cs = _ucode32sel;
420 regs->tf_ss = _udatasel;
421 regs->tf_ds = _udatasel;
422 regs->tf_es = _udatasel;
423 regs->tf_fs = _ufssel;
424 regs->tf_gs = _ugssel;
425 regs->tf_flags = TF_HASSEGS;
426 td->td_pcb->pcb_full_iret = 1;
427 PROC_LOCK(p);
428 mtx_lock(&psp->ps_mtx);
429}
430
431
432/*
433 * Send an interrupt to process.
434 *
435 * Stack is set up to allow sigcode stored
436 * in u. to call routine, followed by kcall
437 * to sigreturn routine below. After sigreturn
438 * resets the signal mask, the stack, and the
439 * frame pointer, it returns to the user
440 * specified pc, psl.
441 */
442static void
443linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
444{
445 struct thread *td = curthread;
446 struct proc *p = td->td_proc;
447 struct sigacts *psp;
448 struct trapframe *regs;
449 struct l_sigframe *fp, frame;
450 l_sigset_t lmask;
451 int oonstack, i;
452 int sig, code;
453
454 sig = ksi->ksi_signo;
455 code = ksi->ksi_code;
456 PROC_LOCK_ASSERT(p, MA_OWNED);
457 psp = p->p_sigacts;
458 mtx_assert(&psp->ps_mtx, MA_OWNED);
459 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
460 /* Signal handler installed with SA_SIGINFO. */
461 linux_rt_sendsig(catcher, ksi, mask);
462 return;
463 }
464
465 regs = td->td_frame;
466 oonstack = sigonstack(regs->tf_rsp);
467
468#ifdef DEBUG
469 if (ldebug(sendsig))
470 printf(ARGS(sendsig, "%p, %d, %p, %u"),
471 catcher, sig, (void*)mask, code);
472#endif
473
474 /*
475 * Allocate space for the signal handler context.
476 */
477 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
478 SIGISMEMBER(psp->ps_sigonstack, sig)) {
479 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
480 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
481 } else
482 fp = (struct l_sigframe *)regs->tf_rsp - 1;
483 mtx_unlock(&psp->ps_mtx);
484 PROC_UNLOCK(p);
485
486 /*
487 * Build the argument list for the signal handler.
488 */
489 if (p->p_sysent->sv_sigtbl)
490 if (sig <= p->p_sysent->sv_sigsize)
491 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
492
493 bzero(&frame, sizeof(frame));
494
495 frame.sf_handler = PTROUT(catcher);
496 frame.sf_sig = sig;
497
498 bsd_to_linux_sigset(mask, &lmask);
499
500 /*
501 * Build the signal context to be used by sigreturn.
502 */
503 frame.sf_sc.sc_mask = lmask.__bits[0];
504 frame.sf_sc.sc_gs = regs->tf_gs;
505 frame.sf_sc.sc_fs = regs->tf_fs;
506 frame.sf_sc.sc_es = regs->tf_es;
507 frame.sf_sc.sc_ds = regs->tf_ds;
508 frame.sf_sc.sc_edi = regs->tf_rdi;
509 frame.sf_sc.sc_esi = regs->tf_rsi;
510 frame.sf_sc.sc_ebp = regs->tf_rbp;
511 frame.sf_sc.sc_ebx = regs->tf_rbx;
512 frame.sf_sc.sc_edx = regs->tf_rdx;
513 frame.sf_sc.sc_ecx = regs->tf_rcx;
514 frame.sf_sc.sc_eax = regs->tf_rax;
515 frame.sf_sc.sc_eip = regs->tf_rip;
516 frame.sf_sc.sc_cs = regs->tf_cs;
517 frame.sf_sc.sc_eflags = regs->tf_rflags;
518 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
519 frame.sf_sc.sc_ss = regs->tf_ss;
520 frame.sf_sc.sc_err = regs->tf_err;
521 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
522 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
523
524 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
525 frame.sf_extramask[i] = lmask.__bits[i+1];
526
527 if (copyout(&frame, fp, sizeof(frame)) != 0) {
528 /*
529 * Process has trashed its stack; give it an illegal
530 * instruction to halt it in its tracks.
531 */
532 PROC_LOCK(p);
533 sigexit(td, SIGILL);
534 }
535
536 /*
537 * Build context to run handler in.
538 */
539 regs->tf_rsp = PTROUT(fp);
540 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
541 regs->tf_rflags &= ~(PSL_T | PSL_D);
542 regs->tf_cs = _ucode32sel;
543 regs->tf_ss = _udatasel;
544 regs->tf_ds = _udatasel;
545 regs->tf_es = _udatasel;
546 regs->tf_fs = _ufssel;
547 regs->tf_gs = _ugssel;
548 regs->tf_flags = TF_HASSEGS;
549 td->td_pcb->pcb_full_iret = 1;
550 PROC_LOCK(p);
551 mtx_lock(&psp->ps_mtx);
552}
553
554/*
555 * System call to cleanup state after a signal
556 * has been taken. Reset signal mask and
557 * stack state from context left by sendsig (above).
558 * Return to previous pc and psl as specified by
559 * context left by sendsig. Check carefully to
560 * make sure that the user has not modified the
561 * psl to gain improper privileges or to cause
562 * a machine fault.
563 */
564int
565linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
566{
567 struct proc *p = td->td_proc;
568 struct l_sigframe frame;
569 struct trapframe *regs;
570 l_sigset_t lmask;
571 int eflags, i;
572 ksiginfo_t ksi;
573
574 regs = td->td_frame;
575
576#ifdef DEBUG
577 if (ldebug(sigreturn))
578 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
579#endif
580 /*
581 * The trampoline code hands us the sigframe.
582 * It is unsafe to keep track of it ourselves, in the event that a
583 * program jumps out of a signal handler.
584 */
585 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
586 return (EFAULT);
587
588 /*
589 * Check for security violations.
590 */
591#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
592 eflags = frame.sf_sc.sc_eflags;
593 /*
594 * XXX do allow users to change the privileged flag PSL_RF. The
595 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
596 * sometimes set it there too. tf_eflags is kept in the signal
597 * context during signal handling and there is no other place
598 * to remember it, so the PSL_RF bit may be corrupted by the
599 * signal handler without us knowing. Corruption of the PSL_RF
600 * bit at worst causes one more or one less debugger trap, so
601 * allowing it is fairly harmless.
602 */
603 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
604 return(EINVAL);
605
606 /*
607 * Don't allow users to load a valid privileged %cs. Let the
608 * hardware check for invalid selectors, excess privilege in
609 * other selectors, invalid %eip's and invalid %esp's.
610 */
611#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
612 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
613 ksiginfo_init_trap(&ksi);
614 ksi.ksi_signo = SIGBUS;
615 ksi.ksi_code = BUS_OBJERR;
616 ksi.ksi_trapno = T_PROTFLT;
617 ksi.ksi_addr = (void *)regs->tf_rip;
618 trapsignal(td, &ksi);
619 return(EINVAL);
620 }
621
622 lmask.__bits[0] = frame.sf_sc.sc_mask;
623 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
624 lmask.__bits[i+1] = frame.sf_extramask[i];
625 PROC_LOCK(p);
626 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
627 SIG_CANTMASK(td->td_sigmask);
628 signotify(td);
629 PROC_UNLOCK(p);
630
631 /*
632 * Restore signal context.
633 */
634 regs->tf_rdi = frame.sf_sc.sc_edi;
635 regs->tf_rsi = frame.sf_sc.sc_esi;
636 regs->tf_rbp = frame.sf_sc.sc_ebp;
637 regs->tf_rbx = frame.sf_sc.sc_ebx;
638 regs->tf_rdx = frame.sf_sc.sc_edx;
639 regs->tf_rcx = frame.sf_sc.sc_ecx;
640 regs->tf_rax = frame.sf_sc.sc_eax;
641 regs->tf_rip = frame.sf_sc.sc_eip;
642 regs->tf_cs = frame.sf_sc.sc_cs;
643 regs->tf_ds = frame.sf_sc.sc_ds;
644 regs->tf_es = frame.sf_sc.sc_es;
645 regs->tf_fs = frame.sf_sc.sc_fs;
646 regs->tf_gs = frame.sf_sc.sc_gs;
647 regs->tf_rflags = eflags;
648 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
649 regs->tf_ss = frame.sf_sc.sc_ss;
650 td->td_pcb->pcb_full_iret = 1;
651
652 return (EJUSTRETURN);
653}
654
655/*
656 * System call to cleanup state after a signal
657 * has been taken. Reset signal mask and
658 * stack state from context left by rt_sendsig (above).
659 * Return to previous pc and psl as specified by
660 * context left by sendsig. Check carefully to
661 * make sure that the user has not modified the
662 * psl to gain improper privileges or to cause
663 * a machine fault.
664 */
665int
666linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
667{
668 struct proc *p = td->td_proc;
669 struct l_ucontext uc;
670 struct l_sigcontext *context;
671 l_stack_t *lss;
672 stack_t ss;
673 struct trapframe *regs;
674 int eflags;
675 ksiginfo_t ksi;
676
677 regs = td->td_frame;
678
679#ifdef DEBUG
680 if (ldebug(rt_sigreturn))
681 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
682#endif
683 /*
684 * The trampoline code hands us the ucontext.
685 * It is unsafe to keep track of it ourselves, in the event that a
686 * program jumps out of a signal handler.
687 */
688 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
689 return (EFAULT);
690
691 context = &uc.uc_mcontext;
692
693 /*
694 * Check for security violations.
695 */
696#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
697 eflags = context->sc_eflags;
698 /*
699 * XXX do allow users to change the privileged flag PSL_RF. The
700 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
701 * sometimes set it there too. tf_eflags is kept in the signal
702 * context during signal handling and there is no other place
703 * to remember it, so the PSL_RF bit may be corrupted by the
704 * signal handler without us knowing. Corruption of the PSL_RF
705 * bit at worst causes one more or one less debugger trap, so
706 * allowing it is fairly harmless.
707 */
708 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
709 return(EINVAL);
710
711 /*
712 * Don't allow users to load a valid privileged %cs. Let the
713 * hardware check for invalid selectors, excess privilege in
714 * other selectors, invalid %eip's and invalid %esp's.
715 */
716#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
717 if (!CS_SECURE(context->sc_cs)) {
718 ksiginfo_init_trap(&ksi);
719 ksi.ksi_signo = SIGBUS;
720 ksi.ksi_code = BUS_OBJERR;
721 ksi.ksi_trapno = T_PROTFLT;
722 ksi.ksi_addr = (void *)regs->tf_rip;
723 trapsignal(td, &ksi);
724 return(EINVAL);
725 }
726
727 PROC_LOCK(p);
728 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
729 SIG_CANTMASK(td->td_sigmask);
730 signotify(td);
731 PROC_UNLOCK(p);
732
733 /*
734 * Restore signal context
735 */
736 regs->tf_gs = context->sc_gs;
737 regs->tf_fs = context->sc_fs;
738 regs->tf_es = context->sc_es;
739 regs->tf_ds = context->sc_ds;
740 regs->tf_rdi = context->sc_edi;
741 regs->tf_rsi = context->sc_esi;
742 regs->tf_rbp = context->sc_ebp;
743 regs->tf_rbx = context->sc_ebx;
744 regs->tf_rdx = context->sc_edx;
745 regs->tf_rcx = context->sc_ecx;
746 regs->tf_rax = context->sc_eax;
747 regs->tf_rip = context->sc_eip;
748 regs->tf_cs = context->sc_cs;
749 regs->tf_rflags = eflags;
750 regs->tf_rsp = context->sc_esp_at_signal;
751 regs->tf_ss = context->sc_ss;
752 td->td_pcb->pcb_full_iret = 1;
753
754 /*
755 * call sigaltstack & ignore results..
756 */
757 lss = &uc.uc_stack;
758 ss.ss_sp = PTRIN(lss->ss_sp);
759 ss.ss_size = lss->ss_size;
760 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
761
762#ifdef DEBUG
763 if (ldebug(rt_sigreturn))
764 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
765 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
766#endif
767 (void)kern_sigaltstack(td, &ss, NULL);
768
769 return (EJUSTRETURN);
770}
771
772/*
773 * MPSAFE
774 */
775static void
776linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
777{
778 args[0] = tf->tf_rbx;
779 args[1] = tf->tf_rcx;
780 args[2] = tf->tf_rdx;
781 args[3] = tf->tf_rsi;
782 args[4] = tf->tf_rdi;
783 args[5] = tf->tf_rbp; /* Unconfirmed */
784 *params = NULL; /* no copyin */
785}
786
787/*
788 * If a linux binary is exec'ing something, try this image activator
789 * first. We override standard shell script execution in order to
790 * be able to modify the interpreter path. We only do this if a linux
791 * binary is doing the exec, so we do not create an EXEC module for it.
792 */
793static int exec_linux_imgact_try(struct image_params *iparams);
794
795static int
796exec_linux_imgact_try(struct image_params *imgp)
797{
798 const char *head = (const char *)imgp->image_header;
799 char *rpath;
800 int error = -1, len;
801
802 /*
803 * The interpreter for shell scripts run from a linux binary needs
804 * to be located in /compat/linux if possible in order to recursively
805 * maintain linux path emulation.
806 */
807 if (((const short *)head)[0] == SHELLMAGIC) {
808 /*
809 * Run our normal shell image activator. If it succeeds attempt
810 * to use the alternate path for the interpreter. If an
811 * alternate * path is found, use our stringspace to store it.
812 */
813 if ((error = exec_shell_imgact(imgp)) == 0) {
814 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
815 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
816 AT_FDCWD);
817 if (rpath != NULL) {
818 len = strlen(rpath) + 1;
819
820 if (len <= MAXSHELLCMDLEN) {
821 memcpy(imgp->interpreter_name, rpath,
822 len);
823 }
824 free(rpath, M_TEMP);
825 }
826 }
827 }
828 return(error);
829}
830
831/*
832 * Clear registers on exec
833 * XXX copied from ia32_signal.c.
834 */
835static void
836exec_linux_setregs(td, entry, stack, ps_strings)
837 struct thread *td;
838 u_long entry;
839 u_long stack;
840 u_long ps_strings;
841{
842 struct trapframe *regs = td->td_frame;
843 struct pcb *pcb = td->td_pcb;
844
845 mtx_lock(&dt_lock);
846 if (td->td_proc->p_md.md_ldt != NULL)
847 user_ldt_free(td);
848 else
849 mtx_unlock(&dt_lock);
850
851 critical_enter();
852 wrmsr(MSR_FSBASE, 0);
853 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
854 pcb->pcb_fsbase = 0;
855 pcb->pcb_gsbase = 0;
856 critical_exit();
857 pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
858
859 bzero((char *)regs, sizeof(struct trapframe));
860 regs->tf_rip = entry;
861 regs->tf_rsp = stack;
862 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
863 regs->tf_gs = _ugssel;
864 regs->tf_fs = _ufssel;
865 regs->tf_es = _udatasel;
866 regs->tf_ds = _udatasel;
867 regs->tf_ss = _udatasel;
868 regs->tf_flags = TF_HASSEGS;
869 regs->tf_cs = _ucode32sel;
870 regs->tf_rbx = ps_strings;
871 td->td_pcb->pcb_full_iret = 1;
872 load_cr0(rcr0() | CR0_MP | CR0_TS);
873 fpstate_drop(td);
874
875 /* Return via doreti so that we can change to a different %cs */
876 pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
877 pcb->pcb_flags &= ~PCB_GS32BIT;
878 td->td_retval[1] = 0;
879}
880
881/*
882 * XXX copied from ia32_sysvec.c.
883 */
884static register_t *
885linux_copyout_strings(struct image_params *imgp)
886{
887 int argc, envc;
888 u_int32_t *vectp;
889 char *stringp, *destp;
890 u_int32_t *stack_base;
891 struct linux32_ps_strings *arginfo;
892
893 /*
894 * Calculate string base and vector table pointers.
895 * Also deal with signal trampoline code for this exec type.
896 */
897 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
898 destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
899 linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
900 sizeof(char *));
901
902 /*
903 * install sigcode
904 */
905 copyout(imgp->proc->p_sysent->sv_sigcode,
906 ((caddr_t)arginfo - linux_szsigcode), linux_szsigcode);
907
908 /*
909 * Install LINUX_PLATFORM
910 */
911 copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
912 linux_szplatform), linux_szplatform);
913
914 /*
915 * If we have a valid auxargs ptr, prepare some room
916 * on the stack.
917 */
918 if (imgp->auxargs) {
919 /*
920 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
921 * lower compatibility.
922 */
923 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
924 (LINUX_AT_COUNT * 2);
925 /*
926 * The '+ 2' is for the null pointers at the end of each of
927 * the arg and env vector sets,and imgp->auxarg_size is room
928 * for argument of Runtime loader.
929 */
930 vectp = (u_int32_t *) (destp - (imgp->args->argc +
931 imgp->args->envc + 2 + imgp->auxarg_size) *
932 sizeof(u_int32_t));
933
934 } else
935 /*
936 * The '+ 2' is for the null pointers at the end of each of
937 * the arg and env vector sets
938 */
939 vectp = (u_int32_t *)(destp - (imgp->args->argc +
940 imgp->args->envc + 2) * sizeof(u_int32_t));
941
942 /*
943 * vectp also becomes our initial stack base
944 */
945 stack_base = vectp;
946
947 stringp = imgp->args->begin_argv;
948 argc = imgp->args->argc;
949 envc = imgp->args->envc;
950 /*
951 * Copy out strings - arguments and environment.
952 */
953 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
954
955 /*
956 * Fill in "ps_strings" struct for ps, w, etc.
957 */
958 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
959 suword32(&arginfo->ps_nargvstr, argc);
960
961 /*
962 * Fill in argument portion of vector table.
963 */
964 for (; argc > 0; --argc) {
965 suword32(vectp++, (uint32_t)(intptr_t)destp);
966 while (*stringp++ != 0)
967 destp++;
968 destp++;
969 }
970
971 /* a null vector table pointer separates the argp's from the envp's */
972 suword32(vectp++, 0);
973
974 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
975 suword32(&arginfo->ps_nenvstr, envc);
976
977 /*
978 * Fill in environment portion of vector table.
979 */
980 for (; envc > 0; --envc) {
981 suword32(vectp++, (uint32_t)(intptr_t)destp);
982 while (*stringp++ != 0)
983 destp++;
984 destp++;
985 }
986
987 /* end of vector table is a null pointer */
988 suword32(vectp, 0);
989
990 return ((register_t *)stack_base);
991}
992
993SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
994 "32-bit Linux emulation");
995
996static u_long linux32_maxdsiz = LINUX32_MAXDSIZ;
997SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
998 &linux32_maxdsiz, 0, "");
999static u_long linux32_maxssiz = LINUX32_MAXSSIZ;
1000SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
1001 &linux32_maxssiz, 0, "");
1002static u_long linux32_maxvmem = LINUX32_MAXVMEM;
1003SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
1004 &linux32_maxvmem, 0, "");
1005
1006static void
1007linux32_fixlimit(struct rlimit *rl, int which)
1008{
1009
1010 switch (which) {
1011 case RLIMIT_DATA:
1012 if (linux32_maxdsiz != 0) {
1013 if (rl->rlim_cur > linux32_maxdsiz)
1014 rl->rlim_cur = linux32_maxdsiz;
1015 if (rl->rlim_max > linux32_maxdsiz)
1016 rl->rlim_max = linux32_maxdsiz;
1017 }
1018 break;
1019 case RLIMIT_STACK:
1020 if (linux32_maxssiz != 0) {
1021 if (rl->rlim_cur > linux32_maxssiz)
1022 rl->rlim_cur = linux32_maxssiz;
1023 if (rl->rlim_max > linux32_maxssiz)
1024 rl->rlim_max = linux32_maxssiz;
1025 }
1026 break;
1027 case RLIMIT_VMEM:
1028 if (linux32_maxvmem != 0) {
1029 if (rl->rlim_cur > linux32_maxvmem)
1030 rl->rlim_cur = linux32_maxvmem;
1031 if (rl->rlim_max > linux32_maxvmem)
1032 rl->rlim_max = linux32_maxvmem;
1033 }
1034 break;
1035 }
1036}
1037
1038struct sysentvec elf_linux_sysvec = {
1039 .sv_size = LINUX_SYS_MAXSYSCALL,
1040 .sv_table = linux_sysent,
1041 .sv_mask = 0,
1042 .sv_sigsize = LINUX_SIGTBLSZ,
1043 .sv_sigtbl = bsd_to_linux_signal,
1044 .sv_errsize = ELAST + 1,
1045 .sv_errtbl = bsd_to_linux_errno,
1046 .sv_transtrap = translate_traps,
1047 .sv_fixup = elf_linux_fixup,
1048 .sv_sendsig = linux_sendsig,
1049 .sv_sigcode = linux_sigcode,
1050 .sv_szsigcode = &linux_szsigcode,
1051 .sv_prepsyscall = linux_prepsyscall,
1052 .sv_name = "Linux ELF32",
1053 .sv_coredump = elf32_coredump,
1054 .sv_imgact_try = exec_linux_imgact_try,
1055 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1056 .sv_pagesize = PAGE_SIZE,
1057 .sv_minuser = VM_MIN_ADDRESS,
1058 .sv_maxuser = LINUX32_USRSTACK,
1059 .sv_usrstack = LINUX32_USRSTACK,
1060 .sv_psstrings = LINUX32_PS_STRINGS,
1061 .sv_stackprot = VM_PROT_ALL,
1062 .sv_copyout_strings = linux_copyout_strings,
1063 .sv_setregs = exec_linux_setregs,
1064 .sv_fixlimit = linux32_fixlimit,
1065 .sv_maxssiz = &linux32_maxssiz,
1066 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32
1067};
1068
131
132static eventhandler_tag linux_exit_tag;
133static eventhandler_tag linux_schedtail_tag;
134static eventhandler_tag linux_exec_tag;
135
136/*
137 * Linux syscalls return negative errno's, we do positive and map them
138 * Reference:
139 * FreeBSD: src/sys/sys/errno.h
140 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
141 * linux-2.6.17.8/include/asm-generic/errno.h
142 */
143static int bsd_to_linux_errno[ELAST + 1] = {
144 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
145 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
146 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
147 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
148 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
149 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
150 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
151 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
152 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
153 -72, -67, -71
154};
155
156int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
157 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
158 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
159 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
160 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
161 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
162 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
163 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
164 0, LINUX_SIGUSR1, LINUX_SIGUSR2
165};
166
167int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
168 SIGHUP, SIGINT, SIGQUIT, SIGILL,
169 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
170 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
171 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
172 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
173 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
174 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
175 SIGIO, SIGURG, SIGSYS
176};
177
178#define LINUX_T_UNKNOWN 255
179static int _bsd_to_linux_trapcode[] = {
180 LINUX_T_UNKNOWN, /* 0 */
181 6, /* 1 T_PRIVINFLT */
182 LINUX_T_UNKNOWN, /* 2 */
183 3, /* 3 T_BPTFLT */
184 LINUX_T_UNKNOWN, /* 4 */
185 LINUX_T_UNKNOWN, /* 5 */
186 16, /* 6 T_ARITHTRAP */
187 254, /* 7 T_ASTFLT */
188 LINUX_T_UNKNOWN, /* 8 */
189 13, /* 9 T_PROTFLT */
190 1, /* 10 T_TRCTRAP */
191 LINUX_T_UNKNOWN, /* 11 */
192 14, /* 12 T_PAGEFLT */
193 LINUX_T_UNKNOWN, /* 13 */
194 17, /* 14 T_ALIGNFLT */
195 LINUX_T_UNKNOWN, /* 15 */
196 LINUX_T_UNKNOWN, /* 16 */
197 LINUX_T_UNKNOWN, /* 17 */
198 0, /* 18 T_DIVIDE */
199 2, /* 19 T_NMI */
200 4, /* 20 T_OFLOW */
201 5, /* 21 T_BOUND */
202 7, /* 22 T_DNA */
203 8, /* 23 T_DOUBLEFLT */
204 9, /* 24 T_FPOPFLT */
205 10, /* 25 T_TSSFLT */
206 11, /* 26 T_SEGNPFLT */
207 12, /* 27 T_STKFLT */
208 18, /* 28 T_MCHK */
209 19, /* 29 T_XMMFLT */
210 15 /* 30 T_RESERVED */
211};
212#define bsd_to_linux_trapcode(code) \
213 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
214 _bsd_to_linux_trapcode[(code)]: \
215 LINUX_T_UNKNOWN)
216
217struct linux32_ps_strings {
218 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
219 u_int ps_nargvstr; /* the number of argument strings */
220 u_int32_t ps_envstr; /* first of 0 or more environment strings */
221 u_int ps_nenvstr; /* the number of environment strings */
222};
223
224/*
225 * If FreeBSD & Linux have a difference of opinion about what a trap
226 * means, deal with it here.
227 *
228 * MPSAFE
229 */
230static int
231translate_traps(int signal, int trap_code)
232{
233 if (signal != SIGBUS)
234 return signal;
235 switch (trap_code) {
236 case T_PROTFLT:
237 case T_TSSFLT:
238 case T_DOUBLEFLT:
239 case T_PAGEFLT:
240 return SIGSEGV;
241 default:
242 return signal;
243 }
244}
245
246static int
247elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
248{
249 Elf32_Auxargs *args;
250 Elf32_Addr *base;
251 Elf32_Addr *pos, *uplatform;
252 struct linux32_ps_strings *arginfo;
253
254 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
255 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
256 linux_szplatform);
257
258 KASSERT(curthread->td_proc == imgp->proc,
259 ("unsafe elf_linux_fixup(), should be curproc"));
260 base = (Elf32_Addr *)*stack_base;
261 args = (Elf32_Auxargs *)imgp->auxargs;
262 pos = base + (imgp->args->argc + imgp->args->envc + 2);
263
264 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
265
266 /*
267 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
268 * as it has appeared in the 2.4.0-rc7 first time.
269 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
270 * glibc falls back to the hard-coded CLK_TCK value when aux entry
271 * is not present.
272 * Also see linux_times() implementation.
273 */
274 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
275 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
276 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
277 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
278 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
279 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
280 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
281 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
282 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
283 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
284 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
285 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
286 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
287 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
288 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
289 if (args->execfd != -1)
290 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
291 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
292
293 free(imgp->auxargs, M_TEMP);
294 imgp->auxargs = NULL;
295
296 base--;
297 suword32(base, (uint32_t)imgp->args->argc);
298 *stack_base = (register_t *)base;
299 return 0;
300}
301
302extern unsigned long linux_sznonrtsigcode;
303
304static void
305linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
306{
307 struct thread *td = curthread;
308 struct proc *p = td->td_proc;
309 struct sigacts *psp;
310 struct trapframe *regs;
311 struct l_rt_sigframe *fp, frame;
312 int oonstack;
313 int sig;
314 int code;
315
316 sig = ksi->ksi_signo;
317 code = ksi->ksi_code;
318 PROC_LOCK_ASSERT(p, MA_OWNED);
319 psp = p->p_sigacts;
320 mtx_assert(&psp->ps_mtx, MA_OWNED);
321 regs = td->td_frame;
322 oonstack = sigonstack(regs->tf_rsp);
323
324#ifdef DEBUG
325 if (ldebug(rt_sendsig))
326 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
327 catcher, sig, (void*)mask, code);
328#endif
329 /*
330 * Allocate space for the signal handler context.
331 */
332 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
333 SIGISMEMBER(psp->ps_sigonstack, sig)) {
334 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
335 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
336 } else
337 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
338 mtx_unlock(&psp->ps_mtx);
339
340 /*
341 * Build the argument list for the signal handler.
342 */
343 if (p->p_sysent->sv_sigtbl)
344 if (sig <= p->p_sysent->sv_sigsize)
345 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
346
347 bzero(&frame, sizeof(frame));
348
349 frame.sf_handler = PTROUT(catcher);
350 frame.sf_sig = sig;
351 frame.sf_siginfo = PTROUT(&fp->sf_si);
352 frame.sf_ucontext = PTROUT(&fp->sf_sc);
353
354 /* Fill in POSIX parts */
355 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
356
357 /*
358 * Build the signal context to be used by sigreturn.
359 */
360 frame.sf_sc.uc_flags = 0; /* XXX ??? */
361 frame.sf_sc.uc_link = 0; /* XXX ??? */
362
363 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
364 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
365 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
366 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
367 PROC_UNLOCK(p);
368
369 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
370
371 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
372 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
373 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
374 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
375 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
376 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
377 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
378 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
379 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
380 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
381 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
382 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
383 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
384 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
385 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
386 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
387 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
388 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
389 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
390 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
391
392#ifdef DEBUG
393 if (ldebug(rt_sendsig))
394 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
395 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
396 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
397#endif
398
399 if (copyout(&frame, fp, sizeof(frame)) != 0) {
400 /*
401 * Process has trashed its stack; give it an illegal
402 * instruction to halt it in its tracks.
403 */
404#ifdef DEBUG
405 if (ldebug(rt_sendsig))
406 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
407 fp, oonstack);
408#endif
409 PROC_LOCK(p);
410 sigexit(td, SIGILL);
411 }
412
413 /*
414 * Build context to run handler in.
415 */
416 regs->tf_rsp = PTROUT(fp);
417 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
418 linux_sznonrtsigcode;
419 regs->tf_rflags &= ~(PSL_T | PSL_D);
420 regs->tf_cs = _ucode32sel;
421 regs->tf_ss = _udatasel;
422 regs->tf_ds = _udatasel;
423 regs->tf_es = _udatasel;
424 regs->tf_fs = _ufssel;
425 regs->tf_gs = _ugssel;
426 regs->tf_flags = TF_HASSEGS;
427 td->td_pcb->pcb_full_iret = 1;
428 PROC_LOCK(p);
429 mtx_lock(&psp->ps_mtx);
430}
431
432
433/*
434 * Send an interrupt to process.
435 *
436 * Stack is set up to allow sigcode stored
437 * in u. to call routine, followed by kcall
438 * to sigreturn routine below. After sigreturn
439 * resets the signal mask, the stack, and the
440 * frame pointer, it returns to the user
441 * specified pc, psl.
442 */
443static void
444linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
445{
446 struct thread *td = curthread;
447 struct proc *p = td->td_proc;
448 struct sigacts *psp;
449 struct trapframe *regs;
450 struct l_sigframe *fp, frame;
451 l_sigset_t lmask;
452 int oonstack, i;
453 int sig, code;
454
455 sig = ksi->ksi_signo;
456 code = ksi->ksi_code;
457 PROC_LOCK_ASSERT(p, MA_OWNED);
458 psp = p->p_sigacts;
459 mtx_assert(&psp->ps_mtx, MA_OWNED);
460 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
461 /* Signal handler installed with SA_SIGINFO. */
462 linux_rt_sendsig(catcher, ksi, mask);
463 return;
464 }
465
466 regs = td->td_frame;
467 oonstack = sigonstack(regs->tf_rsp);
468
469#ifdef DEBUG
470 if (ldebug(sendsig))
471 printf(ARGS(sendsig, "%p, %d, %p, %u"),
472 catcher, sig, (void*)mask, code);
473#endif
474
475 /*
476 * Allocate space for the signal handler context.
477 */
478 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
479 SIGISMEMBER(psp->ps_sigonstack, sig)) {
480 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
481 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
482 } else
483 fp = (struct l_sigframe *)regs->tf_rsp - 1;
484 mtx_unlock(&psp->ps_mtx);
485 PROC_UNLOCK(p);
486
487 /*
488 * Build the argument list for the signal handler.
489 */
490 if (p->p_sysent->sv_sigtbl)
491 if (sig <= p->p_sysent->sv_sigsize)
492 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
493
494 bzero(&frame, sizeof(frame));
495
496 frame.sf_handler = PTROUT(catcher);
497 frame.sf_sig = sig;
498
499 bsd_to_linux_sigset(mask, &lmask);
500
501 /*
502 * Build the signal context to be used by sigreturn.
503 */
504 frame.sf_sc.sc_mask = lmask.__bits[0];
505 frame.sf_sc.sc_gs = regs->tf_gs;
506 frame.sf_sc.sc_fs = regs->tf_fs;
507 frame.sf_sc.sc_es = regs->tf_es;
508 frame.sf_sc.sc_ds = regs->tf_ds;
509 frame.sf_sc.sc_edi = regs->tf_rdi;
510 frame.sf_sc.sc_esi = regs->tf_rsi;
511 frame.sf_sc.sc_ebp = regs->tf_rbp;
512 frame.sf_sc.sc_ebx = regs->tf_rbx;
513 frame.sf_sc.sc_edx = regs->tf_rdx;
514 frame.sf_sc.sc_ecx = regs->tf_rcx;
515 frame.sf_sc.sc_eax = regs->tf_rax;
516 frame.sf_sc.sc_eip = regs->tf_rip;
517 frame.sf_sc.sc_cs = regs->tf_cs;
518 frame.sf_sc.sc_eflags = regs->tf_rflags;
519 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
520 frame.sf_sc.sc_ss = regs->tf_ss;
521 frame.sf_sc.sc_err = regs->tf_err;
522 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
523 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
524
525 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
526 frame.sf_extramask[i] = lmask.__bits[i+1];
527
528 if (copyout(&frame, fp, sizeof(frame)) != 0) {
529 /*
530 * Process has trashed its stack; give it an illegal
531 * instruction to halt it in its tracks.
532 */
533 PROC_LOCK(p);
534 sigexit(td, SIGILL);
535 }
536
537 /*
538 * Build context to run handler in.
539 */
540 regs->tf_rsp = PTROUT(fp);
541 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
542 regs->tf_rflags &= ~(PSL_T | PSL_D);
543 regs->tf_cs = _ucode32sel;
544 regs->tf_ss = _udatasel;
545 regs->tf_ds = _udatasel;
546 regs->tf_es = _udatasel;
547 regs->tf_fs = _ufssel;
548 regs->tf_gs = _ugssel;
549 regs->tf_flags = TF_HASSEGS;
550 td->td_pcb->pcb_full_iret = 1;
551 PROC_LOCK(p);
552 mtx_lock(&psp->ps_mtx);
553}
554
555/*
556 * System call to cleanup state after a signal
557 * has been taken. Reset signal mask and
558 * stack state from context left by sendsig (above).
559 * Return to previous pc and psl as specified by
560 * context left by sendsig. Check carefully to
561 * make sure that the user has not modified the
562 * psl to gain improper privileges or to cause
563 * a machine fault.
564 */
565int
566linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
567{
568 struct proc *p = td->td_proc;
569 struct l_sigframe frame;
570 struct trapframe *regs;
571 l_sigset_t lmask;
572 int eflags, i;
573 ksiginfo_t ksi;
574
575 regs = td->td_frame;
576
577#ifdef DEBUG
578 if (ldebug(sigreturn))
579 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
580#endif
581 /*
582 * The trampoline code hands us the sigframe.
583 * It is unsafe to keep track of it ourselves, in the event that a
584 * program jumps out of a signal handler.
585 */
586 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
587 return (EFAULT);
588
589 /*
590 * Check for security violations.
591 */
592#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
593 eflags = frame.sf_sc.sc_eflags;
594 /*
595 * XXX do allow users to change the privileged flag PSL_RF. The
596 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
597 * sometimes set it there too. tf_eflags is kept in the signal
598 * context during signal handling and there is no other place
599 * to remember it, so the PSL_RF bit may be corrupted by the
600 * signal handler without us knowing. Corruption of the PSL_RF
601 * bit at worst causes one more or one less debugger trap, so
602 * allowing it is fairly harmless.
603 */
604 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
605 return(EINVAL);
606
607 /*
608 * Don't allow users to load a valid privileged %cs. Let the
609 * hardware check for invalid selectors, excess privilege in
610 * other selectors, invalid %eip's and invalid %esp's.
611 */
612#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
613 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
614 ksiginfo_init_trap(&ksi);
615 ksi.ksi_signo = SIGBUS;
616 ksi.ksi_code = BUS_OBJERR;
617 ksi.ksi_trapno = T_PROTFLT;
618 ksi.ksi_addr = (void *)regs->tf_rip;
619 trapsignal(td, &ksi);
620 return(EINVAL);
621 }
622
623 lmask.__bits[0] = frame.sf_sc.sc_mask;
624 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
625 lmask.__bits[i+1] = frame.sf_extramask[i];
626 PROC_LOCK(p);
627 linux_to_bsd_sigset(&lmask, &td->td_sigmask);
628 SIG_CANTMASK(td->td_sigmask);
629 signotify(td);
630 PROC_UNLOCK(p);
631
632 /*
633 * Restore signal context.
634 */
635 regs->tf_rdi = frame.sf_sc.sc_edi;
636 regs->tf_rsi = frame.sf_sc.sc_esi;
637 regs->tf_rbp = frame.sf_sc.sc_ebp;
638 regs->tf_rbx = frame.sf_sc.sc_ebx;
639 regs->tf_rdx = frame.sf_sc.sc_edx;
640 regs->tf_rcx = frame.sf_sc.sc_ecx;
641 regs->tf_rax = frame.sf_sc.sc_eax;
642 regs->tf_rip = frame.sf_sc.sc_eip;
643 regs->tf_cs = frame.sf_sc.sc_cs;
644 regs->tf_ds = frame.sf_sc.sc_ds;
645 regs->tf_es = frame.sf_sc.sc_es;
646 regs->tf_fs = frame.sf_sc.sc_fs;
647 regs->tf_gs = frame.sf_sc.sc_gs;
648 regs->tf_rflags = eflags;
649 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
650 regs->tf_ss = frame.sf_sc.sc_ss;
651 td->td_pcb->pcb_full_iret = 1;
652
653 return (EJUSTRETURN);
654}
655
656/*
657 * System call to cleanup state after a signal
658 * has been taken. Reset signal mask and
659 * stack state from context left by rt_sendsig (above).
660 * Return to previous pc and psl as specified by
661 * context left by sendsig. Check carefully to
662 * make sure that the user has not modified the
663 * psl to gain improper privileges or to cause
664 * a machine fault.
665 */
666int
667linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
668{
669 struct proc *p = td->td_proc;
670 struct l_ucontext uc;
671 struct l_sigcontext *context;
672 l_stack_t *lss;
673 stack_t ss;
674 struct trapframe *regs;
675 int eflags;
676 ksiginfo_t ksi;
677
678 regs = td->td_frame;
679
680#ifdef DEBUG
681 if (ldebug(rt_sigreturn))
682 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
683#endif
684 /*
685 * The trampoline code hands us the ucontext.
686 * It is unsafe to keep track of it ourselves, in the event that a
687 * program jumps out of a signal handler.
688 */
689 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
690 return (EFAULT);
691
692 context = &uc.uc_mcontext;
693
694 /*
695 * Check for security violations.
696 */
697#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
698 eflags = context->sc_eflags;
699 /*
700 * XXX do allow users to change the privileged flag PSL_RF. The
701 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should
702 * sometimes set it there too. tf_eflags is kept in the signal
703 * context during signal handling and there is no other place
704 * to remember it, so the PSL_RF bit may be corrupted by the
705 * signal handler without us knowing. Corruption of the PSL_RF
706 * bit at worst causes one more or one less debugger trap, so
707 * allowing it is fairly harmless.
708 */
709 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
710 return(EINVAL);
711
712 /*
713 * Don't allow users to load a valid privileged %cs. Let the
714 * hardware check for invalid selectors, excess privilege in
715 * other selectors, invalid %eip's and invalid %esp's.
716 */
717#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
718 if (!CS_SECURE(context->sc_cs)) {
719 ksiginfo_init_trap(&ksi);
720 ksi.ksi_signo = SIGBUS;
721 ksi.ksi_code = BUS_OBJERR;
722 ksi.ksi_trapno = T_PROTFLT;
723 ksi.ksi_addr = (void *)regs->tf_rip;
724 trapsignal(td, &ksi);
725 return(EINVAL);
726 }
727
728 PROC_LOCK(p);
729 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
730 SIG_CANTMASK(td->td_sigmask);
731 signotify(td);
732 PROC_UNLOCK(p);
733
734 /*
735 * Restore signal context
736 */
737 regs->tf_gs = context->sc_gs;
738 regs->tf_fs = context->sc_fs;
739 regs->tf_es = context->sc_es;
740 regs->tf_ds = context->sc_ds;
741 regs->tf_rdi = context->sc_edi;
742 regs->tf_rsi = context->sc_esi;
743 regs->tf_rbp = context->sc_ebp;
744 regs->tf_rbx = context->sc_ebx;
745 regs->tf_rdx = context->sc_edx;
746 regs->tf_rcx = context->sc_ecx;
747 regs->tf_rax = context->sc_eax;
748 regs->tf_rip = context->sc_eip;
749 regs->tf_cs = context->sc_cs;
750 regs->tf_rflags = eflags;
751 regs->tf_rsp = context->sc_esp_at_signal;
752 regs->tf_ss = context->sc_ss;
753 td->td_pcb->pcb_full_iret = 1;
754
755 /*
756 * call sigaltstack & ignore results..
757 */
758 lss = &uc.uc_stack;
759 ss.ss_sp = PTRIN(lss->ss_sp);
760 ss.ss_size = lss->ss_size;
761 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
762
763#ifdef DEBUG
764 if (ldebug(rt_sigreturn))
765 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
766 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
767#endif
768 (void)kern_sigaltstack(td, &ss, NULL);
769
770 return (EJUSTRETURN);
771}
772
773/*
774 * MPSAFE
775 */
776static void
777linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
778{
779 args[0] = tf->tf_rbx;
780 args[1] = tf->tf_rcx;
781 args[2] = tf->tf_rdx;
782 args[3] = tf->tf_rsi;
783 args[4] = tf->tf_rdi;
784 args[5] = tf->tf_rbp; /* Unconfirmed */
785 *params = NULL; /* no copyin */
786}
787
788/*
789 * If a linux binary is exec'ing something, try this image activator
790 * first. We override standard shell script execution in order to
791 * be able to modify the interpreter path. We only do this if a linux
792 * binary is doing the exec, so we do not create an EXEC module for it.
793 */
794static int exec_linux_imgact_try(struct image_params *iparams);
795
796static int
797exec_linux_imgact_try(struct image_params *imgp)
798{
799 const char *head = (const char *)imgp->image_header;
800 char *rpath;
801 int error = -1, len;
802
803 /*
804 * The interpreter for shell scripts run from a linux binary needs
805 * to be located in /compat/linux if possible in order to recursively
806 * maintain linux path emulation.
807 */
808 if (((const short *)head)[0] == SHELLMAGIC) {
809 /*
810 * Run our normal shell image activator. If it succeeds attempt
811 * to use the alternate path for the interpreter. If an
812 * alternate * path is found, use our stringspace to store it.
813 */
814 if ((error = exec_shell_imgact(imgp)) == 0) {
815 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
816 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
817 AT_FDCWD);
818 if (rpath != NULL) {
819 len = strlen(rpath) + 1;
820
821 if (len <= MAXSHELLCMDLEN) {
822 memcpy(imgp->interpreter_name, rpath,
823 len);
824 }
825 free(rpath, M_TEMP);
826 }
827 }
828 }
829 return(error);
830}
831
832/*
833 * Clear registers on exec
834 * XXX copied from ia32_signal.c.
835 */
836static void
837exec_linux_setregs(td, entry, stack, ps_strings)
838 struct thread *td;
839 u_long entry;
840 u_long stack;
841 u_long ps_strings;
842{
843 struct trapframe *regs = td->td_frame;
844 struct pcb *pcb = td->td_pcb;
845
846 mtx_lock(&dt_lock);
847 if (td->td_proc->p_md.md_ldt != NULL)
848 user_ldt_free(td);
849 else
850 mtx_unlock(&dt_lock);
851
852 critical_enter();
853 wrmsr(MSR_FSBASE, 0);
854 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
855 pcb->pcb_fsbase = 0;
856 pcb->pcb_gsbase = 0;
857 critical_exit();
858 pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
859
860 bzero((char *)regs, sizeof(struct trapframe));
861 regs->tf_rip = entry;
862 regs->tf_rsp = stack;
863 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
864 regs->tf_gs = _ugssel;
865 regs->tf_fs = _ufssel;
866 regs->tf_es = _udatasel;
867 regs->tf_ds = _udatasel;
868 regs->tf_ss = _udatasel;
869 regs->tf_flags = TF_HASSEGS;
870 regs->tf_cs = _ucode32sel;
871 regs->tf_rbx = ps_strings;
872 td->td_pcb->pcb_full_iret = 1;
873 load_cr0(rcr0() | CR0_MP | CR0_TS);
874 fpstate_drop(td);
875
876 /* Return via doreti so that we can change to a different %cs */
877 pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
878 pcb->pcb_flags &= ~PCB_GS32BIT;
879 td->td_retval[1] = 0;
880}
881
882/*
883 * XXX copied from ia32_sysvec.c.
884 */
885static register_t *
886linux_copyout_strings(struct image_params *imgp)
887{
888 int argc, envc;
889 u_int32_t *vectp;
890 char *stringp, *destp;
891 u_int32_t *stack_base;
892 struct linux32_ps_strings *arginfo;
893
894 /*
895 * Calculate string base and vector table pointers.
896 * Also deal with signal trampoline code for this exec type.
897 */
898 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
899 destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
900 linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
901 sizeof(char *));
902
903 /*
904 * install sigcode
905 */
906 copyout(imgp->proc->p_sysent->sv_sigcode,
907 ((caddr_t)arginfo - linux_szsigcode), linux_szsigcode);
908
909 /*
910 * Install LINUX_PLATFORM
911 */
912 copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
913 linux_szplatform), linux_szplatform);
914
915 /*
916 * If we have a valid auxargs ptr, prepare some room
917 * on the stack.
918 */
919 if (imgp->auxargs) {
920 /*
921 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
922 * lower compatibility.
923 */
924 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
925 (LINUX_AT_COUNT * 2);
926 /*
927 * The '+ 2' is for the null pointers at the end of each of
928 * the arg and env vector sets,and imgp->auxarg_size is room
929 * for argument of Runtime loader.
930 */
931 vectp = (u_int32_t *) (destp - (imgp->args->argc +
932 imgp->args->envc + 2 + imgp->auxarg_size) *
933 sizeof(u_int32_t));
934
935 } else
936 /*
937 * The '+ 2' is for the null pointers at the end of each of
938 * the arg and env vector sets
939 */
940 vectp = (u_int32_t *)(destp - (imgp->args->argc +
941 imgp->args->envc + 2) * sizeof(u_int32_t));
942
943 /*
944 * vectp also becomes our initial stack base
945 */
946 stack_base = vectp;
947
948 stringp = imgp->args->begin_argv;
949 argc = imgp->args->argc;
950 envc = imgp->args->envc;
951 /*
952 * Copy out strings - arguments and environment.
953 */
954 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
955
956 /*
957 * Fill in "ps_strings" struct for ps, w, etc.
958 */
959 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
960 suword32(&arginfo->ps_nargvstr, argc);
961
962 /*
963 * Fill in argument portion of vector table.
964 */
965 for (; argc > 0; --argc) {
966 suword32(vectp++, (uint32_t)(intptr_t)destp);
967 while (*stringp++ != 0)
968 destp++;
969 destp++;
970 }
971
972 /* a null vector table pointer separates the argp's from the envp's */
973 suword32(vectp++, 0);
974
975 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
976 suword32(&arginfo->ps_nenvstr, envc);
977
978 /*
979 * Fill in environment portion of vector table.
980 */
981 for (; envc > 0; --envc) {
982 suword32(vectp++, (uint32_t)(intptr_t)destp);
983 while (*stringp++ != 0)
984 destp++;
985 destp++;
986 }
987
988 /* end of vector table is a null pointer */
989 suword32(vectp, 0);
990
991 return ((register_t *)stack_base);
992}
993
994SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
995 "32-bit Linux emulation");
996
997static u_long linux32_maxdsiz = LINUX32_MAXDSIZ;
998SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
999 &linux32_maxdsiz, 0, "");
1000static u_long linux32_maxssiz = LINUX32_MAXSSIZ;
1001SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
1002 &linux32_maxssiz, 0, "");
1003static u_long linux32_maxvmem = LINUX32_MAXVMEM;
1004SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
1005 &linux32_maxvmem, 0, "");
1006
1007static void
1008linux32_fixlimit(struct rlimit *rl, int which)
1009{
1010
1011 switch (which) {
1012 case RLIMIT_DATA:
1013 if (linux32_maxdsiz != 0) {
1014 if (rl->rlim_cur > linux32_maxdsiz)
1015 rl->rlim_cur = linux32_maxdsiz;
1016 if (rl->rlim_max > linux32_maxdsiz)
1017 rl->rlim_max = linux32_maxdsiz;
1018 }
1019 break;
1020 case RLIMIT_STACK:
1021 if (linux32_maxssiz != 0) {
1022 if (rl->rlim_cur > linux32_maxssiz)
1023 rl->rlim_cur = linux32_maxssiz;
1024 if (rl->rlim_max > linux32_maxssiz)
1025 rl->rlim_max = linux32_maxssiz;
1026 }
1027 break;
1028 case RLIMIT_VMEM:
1029 if (linux32_maxvmem != 0) {
1030 if (rl->rlim_cur > linux32_maxvmem)
1031 rl->rlim_cur = linux32_maxvmem;
1032 if (rl->rlim_max > linux32_maxvmem)
1033 rl->rlim_max = linux32_maxvmem;
1034 }
1035 break;
1036 }
1037}
1038
1039struct sysentvec elf_linux_sysvec = {
1040 .sv_size = LINUX_SYS_MAXSYSCALL,
1041 .sv_table = linux_sysent,
1042 .sv_mask = 0,
1043 .sv_sigsize = LINUX_SIGTBLSZ,
1044 .sv_sigtbl = bsd_to_linux_signal,
1045 .sv_errsize = ELAST + 1,
1046 .sv_errtbl = bsd_to_linux_errno,
1047 .sv_transtrap = translate_traps,
1048 .sv_fixup = elf_linux_fixup,
1049 .sv_sendsig = linux_sendsig,
1050 .sv_sigcode = linux_sigcode,
1051 .sv_szsigcode = &linux_szsigcode,
1052 .sv_prepsyscall = linux_prepsyscall,
1053 .sv_name = "Linux ELF32",
1054 .sv_coredump = elf32_coredump,
1055 .sv_imgact_try = exec_linux_imgact_try,
1056 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1057 .sv_pagesize = PAGE_SIZE,
1058 .sv_minuser = VM_MIN_ADDRESS,
1059 .sv_maxuser = LINUX32_USRSTACK,
1060 .sv_usrstack = LINUX32_USRSTACK,
1061 .sv_psstrings = LINUX32_PS_STRINGS,
1062 .sv_stackprot = VM_PROT_ALL,
1063 .sv_copyout_strings = linux_copyout_strings,
1064 .sv_setregs = exec_linux_setregs,
1065 .sv_fixlimit = linux32_fixlimit,
1066 .sv_maxssiz = &linux32_maxssiz,
1067 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32
1068};
1069
1069static char GNULINUX_ABI_VENDOR[] = "GNU";
1070static char GNU_ABI_VENDOR[] = "GNU";
1071static int GNULINUX_ABI_DESC = 0;
1070
1072
1073static boolean_t
1074linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1075{
1076 const Elf32_Word *desc;
1077 uintptr_t p;
1078
1079 p = (uintptr_t)(note + 1);
1080 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1081
1082 desc = (const Elf32_Word *)p;
1083 if (desc[0] != GNULINUX_ABI_DESC)
1084 return (FALSE);
1085
1086 /*
1087 * For linux we encode osrel as follows (see linux_mib.c):
1088 * VVVMMMIII (version, major, minor), see linux_mib.c.
1089 */
1090 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1091
1092 return (TRUE);
1093}
1094
1071static Elf_Brandnote linux32_brandnote = {
1095static Elf_Brandnote linux32_brandnote = {
1072 .hdr.n_namesz = sizeof(GNULINUX_ABI_VENDOR),
1073 .hdr.n_descsz = 16,
1096 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1097 .hdr.n_descsz = 16, /* XXX at least 16 */
1074 .hdr.n_type = 1,
1098 .hdr.n_type = 1,
1075 .vendor = GNULINUX_ABI_VENDOR,
1076 .flags = 0
1099 .vendor = GNU_ABI_VENDOR,
1100 .flags = BN_TRANSLATE_OSREL,
1101 .trans_osrel = linux32_trans_osrel
1077};
1078
1079static Elf32_Brandinfo linux_brand = {
1080 .brand = ELFOSABI_LINUX,
1081 .machine = EM_386,
1082 .compat_3_brand = "Linux",
1083 .emul_path = "/compat/linux",
1084 .interp_path = "/lib/ld-linux.so.1",
1085 .sysvec = &elf_linux_sysvec,
1086 .interp_newpath = NULL,
1087 .brand_note = &linux32_brandnote,
1088 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1089};
1090
1091static Elf32_Brandinfo linux_glibc2brand = {
1092 .brand = ELFOSABI_LINUX,
1093 .machine = EM_386,
1094 .compat_3_brand = "Linux",
1095 .emul_path = "/compat/linux",
1096 .interp_path = "/lib/ld-linux.so.2",
1097 .sysvec = &elf_linux_sysvec,
1098 .interp_newpath = NULL,
1099 .brand_note = &linux32_brandnote,
1100 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1101};
1102
1103Elf32_Brandinfo *linux_brandlist[] = {
1104 &linux_brand,
1105 &linux_glibc2brand,
1106 NULL
1107};
1108
1109static int
1110linux_elf_modevent(module_t mod, int type, void *data)
1111{
1112 Elf32_Brandinfo **brandinfo;
1113 int error;
1114 struct linux_ioctl_handler **lihp;
1115 struct linux_device_handler **ldhp;
1116
1117 error = 0;
1118
1119 switch(type) {
1120 case MOD_LOAD:
1121 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1122 ++brandinfo)
1123 if (elf32_insert_brand_entry(*brandinfo) < 0)
1124 error = EINVAL;
1125 if (error == 0) {
1126 SET_FOREACH(lihp, linux_ioctl_handler_set)
1127 linux_ioctl_register_handler(*lihp);
1128 SET_FOREACH(ldhp, linux_device_handler_set)
1129 linux_device_register_handler(*ldhp);
1130 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1131 sx_init(&emul_shared_lock, "emuldata->shared lock");
1132 LIST_INIT(&futex_list);
1133 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1134 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1135 linux_proc_exit, NULL, 1000);
1136 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail,
1137 linux_schedtail, NULL, 1000);
1138 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1139 linux_proc_exec, NULL, 1000);
1140 linux_szplatform = roundup(strlen(linux_platform) + 1,
1141 sizeof(char *));
1142 linux_osd_jail_register();
1143 stclohz = (stathz ? stathz : hz);
1144 if (bootverbose)
1145 printf("Linux ELF exec handler installed\n");
1146 } else
1147 printf("cannot insert Linux ELF brand handler\n");
1148 break;
1149 case MOD_UNLOAD:
1150 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1151 ++brandinfo)
1152 if (elf32_brand_inuse(*brandinfo))
1153 error = EBUSY;
1154 if (error == 0) {
1155 for (brandinfo = &linux_brandlist[0];
1156 *brandinfo != NULL; ++brandinfo)
1157 if (elf32_remove_brand_entry(*brandinfo) < 0)
1158 error = EINVAL;
1159 }
1160 if (error == 0) {
1161 SET_FOREACH(lihp, linux_ioctl_handler_set)
1162 linux_ioctl_unregister_handler(*lihp);
1163 SET_FOREACH(ldhp, linux_device_handler_set)
1164 linux_device_unregister_handler(*ldhp);
1165 mtx_destroy(&emul_lock);
1166 sx_destroy(&emul_shared_lock);
1167 mtx_destroy(&futex_mtx);
1168 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1169 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1170 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1171 linux_osd_jail_deregister();
1172 if (bootverbose)
1173 printf("Linux ELF exec handler removed\n");
1174 } else
1175 printf("Could not deinstall ELF interpreter entry\n");
1176 break;
1177 default:
1178 return EOPNOTSUPP;
1179 }
1180 return error;
1181}
1182
1183static moduledata_t linux_elf_mod = {
1184 "linuxelf",
1185 linux_elf_modevent,
1186 0
1187};
1188
1189DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1102};
1103
1104static Elf32_Brandinfo linux_brand = {
1105 .brand = ELFOSABI_LINUX,
1106 .machine = EM_386,
1107 .compat_3_brand = "Linux",
1108 .emul_path = "/compat/linux",
1109 .interp_path = "/lib/ld-linux.so.1",
1110 .sysvec = &elf_linux_sysvec,
1111 .interp_newpath = NULL,
1112 .brand_note = &linux32_brandnote,
1113 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1114};
1115
1116static Elf32_Brandinfo linux_glibc2brand = {
1117 .brand = ELFOSABI_LINUX,
1118 .machine = EM_386,
1119 .compat_3_brand = "Linux",
1120 .emul_path = "/compat/linux",
1121 .interp_path = "/lib/ld-linux.so.2",
1122 .sysvec = &elf_linux_sysvec,
1123 .interp_newpath = NULL,
1124 .brand_note = &linux32_brandnote,
1125 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1126};
1127
1128Elf32_Brandinfo *linux_brandlist[] = {
1129 &linux_brand,
1130 &linux_glibc2brand,
1131 NULL
1132};
1133
1134static int
1135linux_elf_modevent(module_t mod, int type, void *data)
1136{
1137 Elf32_Brandinfo **brandinfo;
1138 int error;
1139 struct linux_ioctl_handler **lihp;
1140 struct linux_device_handler **ldhp;
1141
1142 error = 0;
1143
1144 switch(type) {
1145 case MOD_LOAD:
1146 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1147 ++brandinfo)
1148 if (elf32_insert_brand_entry(*brandinfo) < 0)
1149 error = EINVAL;
1150 if (error == 0) {
1151 SET_FOREACH(lihp, linux_ioctl_handler_set)
1152 linux_ioctl_register_handler(*lihp);
1153 SET_FOREACH(ldhp, linux_device_handler_set)
1154 linux_device_register_handler(*ldhp);
1155 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1156 sx_init(&emul_shared_lock, "emuldata->shared lock");
1157 LIST_INIT(&futex_list);
1158 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1159 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1160 linux_proc_exit, NULL, 1000);
1161 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail,
1162 linux_schedtail, NULL, 1000);
1163 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1164 linux_proc_exec, NULL, 1000);
1165 linux_szplatform = roundup(strlen(linux_platform) + 1,
1166 sizeof(char *));
1167 linux_osd_jail_register();
1168 stclohz = (stathz ? stathz : hz);
1169 if (bootverbose)
1170 printf("Linux ELF exec handler installed\n");
1171 } else
1172 printf("cannot insert Linux ELF brand handler\n");
1173 break;
1174 case MOD_UNLOAD:
1175 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1176 ++brandinfo)
1177 if (elf32_brand_inuse(*brandinfo))
1178 error = EBUSY;
1179 if (error == 0) {
1180 for (brandinfo = &linux_brandlist[0];
1181 *brandinfo != NULL; ++brandinfo)
1182 if (elf32_remove_brand_entry(*brandinfo) < 0)
1183 error = EINVAL;
1184 }
1185 if (error == 0) {
1186 SET_FOREACH(lihp, linux_ioctl_handler_set)
1187 linux_ioctl_unregister_handler(*lihp);
1188 SET_FOREACH(ldhp, linux_device_handler_set)
1189 linux_device_unregister_handler(*ldhp);
1190 mtx_destroy(&emul_lock);
1191 sx_destroy(&emul_shared_lock);
1192 mtx_destroy(&futex_mtx);
1193 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1194 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1195 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1196 linux_osd_jail_deregister();
1197 if (bootverbose)
1198 printf("Linux ELF exec handler removed\n");
1199 } else
1200 printf("Could not deinstall ELF interpreter entry\n");
1201 break;
1202 default:
1203 return EOPNOTSUPP;
1204 }
1205 return error;
1206}
1207
1208static moduledata_t linux_elf_mod = {
1209 "linuxelf",
1210 linux_elf_modevent,
1211 0
1212};
1213
1214DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);