Deleted Added
full compact
linux_sysvec.c (283411) linux_sysvec.c (283421)
1/*-
2 * Copyright (c) 1994-1996 S��ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 1994-1996 S��ren Schmidt
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/i386/linux/linux_sysvec.c 283411 2015-05-24 15:32:52Z dchagin $");
30__FBSDID("$FreeBSD: head/sys/i386/linux/linux_sysvec.c 283421 2015-05-24 15:51:18Z dchagin $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/exec.h>
35#include <sys/fcntl.h>
36#include <sys/imgact.h>
37#include <sys/imgact_aout.h>
38#include <sys/imgact_elf.h>
39#include <sys/kernel.h>
40#include <sys/lock.h>
41#include <sys/malloc.h>
42#include <sys/module.h>
43#include <sys/mutex.h>
44#include <sys/proc.h>
45#include <sys/signalvar.h>
46#include <sys/syscallsubr.h>
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/exec.h>
35#include <sys/fcntl.h>
36#include <sys/imgact.h>
37#include <sys/imgact_aout.h>
38#include <sys/imgact_elf.h>
39#include <sys/kernel.h>
40#include <sys/lock.h>
41#include <sys/malloc.h>
42#include <sys/module.h>
43#include <sys/mutex.h>
44#include <sys/proc.h>
45#include <sys/signalvar.h>
46#include <sys/syscallsubr.h>
47#include <sys/sysctl.h>
47#include <sys/sysent.h>
48#include <sys/sysproto.h>
49#include <sys/vnode.h>
50#include <sys/eventhandler.h>
51
52#include <vm/vm.h>
53#include <vm/pmap.h>
54#include <vm/vm_extern.h>
55#include <vm/vm_map.h>
56#include <vm/vm_object.h>
57#include <vm/vm_page.h>
58#include <vm/vm_param.h>
59
60#include <machine/cpu.h>
61#include <machine/cputypes.h>
62#include <machine/md_var.h>
63#include <machine/pcb.h>
64
65#include <i386/linux/linux.h>
66#include <i386/linux/linux_proto.h>
67#include <compat/linux/linux_emul.h>
68#include <compat/linux/linux_futex.h>
69#include <compat/linux/linux_ioctl.h>
70#include <compat/linux/linux_mib.h>
71#include <compat/linux/linux_misc.h>
72#include <compat/linux/linux_signal.h>
73#include <compat/linux/linux_util.h>
74#include <compat/linux/linux_vdso.h>
75
76MODULE_VERSION(linux, 1);
77
48#include <sys/sysent.h>
49#include <sys/sysproto.h>
50#include <sys/vnode.h>
51#include <sys/eventhandler.h>
52
53#include <vm/vm.h>
54#include <vm/pmap.h>
55#include <vm/vm_extern.h>
56#include <vm/vm_map.h>
57#include <vm/vm_object.h>
58#include <vm/vm_page.h>
59#include <vm/vm_param.h>
60
61#include <machine/cpu.h>
62#include <machine/cputypes.h>
63#include <machine/md_var.h>
64#include <machine/pcb.h>
65
66#include <i386/linux/linux.h>
67#include <i386/linux/linux_proto.h>
68#include <compat/linux/linux_emul.h>
69#include <compat/linux/linux_futex.h>
70#include <compat/linux/linux_ioctl.h>
71#include <compat/linux/linux_mib.h>
72#include <compat/linux/linux_misc.h>
73#include <compat/linux/linux_signal.h>
74#include <compat/linux/linux_util.h>
75#include <compat/linux/linux_vdso.h>
76
77MODULE_VERSION(linux, 1);
78
78MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
79
80#if BYTE_ORDER == LITTLE_ENDIAN
81#define SHELLMAGIC 0x2123 /* #! */
82#else
83#define SHELLMAGIC 0x2321
84#endif
85
79#if BYTE_ORDER == LITTLE_ENDIAN
80#define SHELLMAGIC 0x2123 /* #! */
81#else
82#define SHELLMAGIC 0x2321
83#endif
84
85#if defined(DEBUG)
86SYSCTL_PROC(_compat_linux, OID_AUTO, debug,
87 CTLTYPE_STRING | CTLFLAG_RW,
88 0, 0, linux_sysctl_debug, "A",
89 "Linux debugging control");
90#endif
91
86/*
87 * Allow the sendsig functions to use the ldebug() facility
88 * even though they are not syscalls themselves. Map them
89 * to syscall 0. This is slightly less bogus than using
90 * ldebug(sigreturn).
91 */
92#define LINUX_SYS_linux_rt_sendsig 0
93#define LINUX_SYS_linux_sendsig 0
94
95#define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings))
96
97static int linux_szsigcode;
98static vm_object_t linux_shared_page_obj;
99static char *linux_shared_page_mapping;
100extern char _binary_linux_locore_o_start;
101extern char _binary_linux_locore_o_end;
102
103extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
104
105SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
92/*
93 * Allow the sendsig functions to use the ldebug() facility
94 * even though they are not syscalls themselves. Map them
95 * to syscall 0. This is slightly less bogus than using
96 * ldebug(sigreturn).
97 */
98#define LINUX_SYS_linux_rt_sendsig 0
99#define LINUX_SYS_linux_sendsig 0
100
101#define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings))
102
103static int linux_szsigcode;
104static vm_object_t linux_shared_page_obj;
105static char *linux_shared_page_mapping;
106extern char _binary_linux_locore_o_start;
107extern char _binary_linux_locore_o_end;
108
109extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
110
111SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
106SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
107
108static int linux_fixup(register_t **stack_base,
109 struct image_params *iparams);
110static int elf_linux_fixup(register_t **stack_base,
111 struct image_params *iparams);
112static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
113static void exec_linux_setregs(struct thread *td,
114 struct image_params *imgp, u_long stack);
115static register_t *linux_copyout_strings(struct image_params *imgp);
116static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
117static void linux_vdso_install(void *param);
118static void linux_vdso_deinstall(void *param);
119
120static int linux_szplatform;
121const char *linux_kplatform;
122
123static eventhandler_tag linux_exit_tag;
124static eventhandler_tag linux_exec_tag;
125static eventhandler_tag linux_thread_dtor_tag;
126
127/*
128 * Linux syscalls return negative errno's, we do positive and map them
129 * Reference:
130 * FreeBSD: src/sys/sys/errno.h
131 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
132 * linux-2.6.17.8/include/asm-generic/errno.h
133 */
134static int bsd_to_linux_errno[ELAST + 1] = {
135 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
136 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
137 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
138 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
139 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
140 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
141 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
142 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
143 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
144 -72, -67, -71
145};
146
147int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
148 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
149 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
150 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
151 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
152 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
153 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
154 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
155 0, LINUX_SIGUSR1, LINUX_SIGUSR2
156};
157
158int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
159 SIGHUP, SIGINT, SIGQUIT, SIGILL,
160 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
161 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
162 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
163 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
164 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
165 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
166 SIGIO, SIGURG, SIGSYS
167};
168
169#define LINUX_T_UNKNOWN 255
170static int _bsd_to_linux_trapcode[] = {
171 LINUX_T_UNKNOWN, /* 0 */
172 6, /* 1 T_PRIVINFLT */
173 LINUX_T_UNKNOWN, /* 2 */
174 3, /* 3 T_BPTFLT */
175 LINUX_T_UNKNOWN, /* 4 */
176 LINUX_T_UNKNOWN, /* 5 */
177 16, /* 6 T_ARITHTRAP */
178 254, /* 7 T_ASTFLT */
179 LINUX_T_UNKNOWN, /* 8 */
180 13, /* 9 T_PROTFLT */
181 1, /* 10 T_TRCTRAP */
182 LINUX_T_UNKNOWN, /* 11 */
183 14, /* 12 T_PAGEFLT */
184 LINUX_T_UNKNOWN, /* 13 */
185 17, /* 14 T_ALIGNFLT */
186 LINUX_T_UNKNOWN, /* 15 */
187 LINUX_T_UNKNOWN, /* 16 */
188 LINUX_T_UNKNOWN, /* 17 */
189 0, /* 18 T_DIVIDE */
190 2, /* 19 T_NMI */
191 4, /* 20 T_OFLOW */
192 5, /* 21 T_BOUND */
193 7, /* 22 T_DNA */
194 8, /* 23 T_DOUBLEFLT */
195 9, /* 24 T_FPOPFLT */
196 10, /* 25 T_TSSFLT */
197 11, /* 26 T_SEGNPFLT */
198 12, /* 27 T_STKFLT */
199 18, /* 28 T_MCHK */
200 19, /* 29 T_XMMFLT */
201 15 /* 30 T_RESERVED */
202};
203#define bsd_to_linux_trapcode(code) \
204 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
205 _bsd_to_linux_trapcode[(code)]: \
206 LINUX_T_UNKNOWN)
207
208LINUX_VDSO_SYM_INTPTR(linux_sigcode);
209LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
210LINUX_VDSO_SYM_INTPTR(linux_vsyscall);
211
212/*
213 * If FreeBSD & Linux have a difference of opinion about what a trap
214 * means, deal with it here.
215 *
216 * MPSAFE
217 */
218static int
219translate_traps(int signal, int trap_code)
220{
221 if (signal != SIGBUS)
222 return (signal);
223 switch (trap_code) {
224 case T_PROTFLT:
225 case T_TSSFLT:
226 case T_DOUBLEFLT:
227 case T_PAGEFLT:
228 return (SIGSEGV);
229 default:
230 return (signal);
231 }
232}
233
234static int
235linux_fixup(register_t **stack_base, struct image_params *imgp)
236{
237 register_t *argv, *envp;
238
239 argv = *stack_base;
240 envp = *stack_base + (imgp->args->argc + 1);
241 (*stack_base)--;
242 suword(*stack_base, (intptr_t)(void *)envp);
243 (*stack_base)--;
244 suword(*stack_base, (intptr_t)(void *)argv);
245 (*stack_base)--;
246 suword(*stack_base, imgp->args->argc);
247 return (0);
248}
249
250static int
251elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
252{
253 struct proc *p;
254 Elf32_Auxargs *args;
255 Elf32_Addr *uplatform;
256 struct ps_strings *arginfo;
257 register_t *pos;
258
259 KASSERT(curthread->td_proc == imgp->proc,
260 ("unsafe elf_linux_fixup(), should be curproc"));
261
262 p = imgp->proc;
263 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
264 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
265 args = (Elf32_Auxargs *)imgp->auxargs;
266 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
267
268 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
269 imgp->proc->p_sysent->sv_shared_page_base);
270 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall);
271 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
272
273 /*
274 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
275 * as it has appeared in the 2.4.0-rc7 first time.
276 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
277 * glibc falls back to the hard-coded CLK_TCK value when aux entry
278 * is not present.
279 * Also see linux_times() implementation.
280 */
281 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
282 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
283 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
284 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
285 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
286 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
287 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
288 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
289 AUXARGS_ENTRY(pos, AT_BASE, args->base);
290 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
291 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
292 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
293 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
294 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
295 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
296 if (args->execfd != -1)
297 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
298 AUXARGS_ENTRY(pos, AT_NULL, 0);
299
300 free(imgp->auxargs, M_TEMP);
301 imgp->auxargs = NULL;
302
303 (*stack_base)--;
304 suword(*stack_base, (register_t)imgp->args->argc);
305 return (0);
306}
307
308/*
309 * Copied from kern/kern_exec.c
310 */
311static register_t *
312linux_copyout_strings(struct image_params *imgp)
313{
314 int argc, envc;
315 char **vectp;
316 char *stringp, *destp;
317 register_t *stack_base;
318 struct ps_strings *arginfo;
319 struct proc *p;
320
321 /*
322 * Calculate string base and vector table pointers.
323 */
324 p = imgp->proc;
325 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
326 destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
327 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
328
329 /*
330 * install LINUX_PLATFORM
331 */
332 copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform),
333 linux_szplatform);
334
335 /*
336 * If we have a valid auxargs ptr, prepare some room
337 * on the stack.
338 */
339 if (imgp->auxargs) {
340 /*
341 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
342 * lower compatibility.
343 */
344 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
345 (LINUX_AT_COUNT * 2);
346 /*
347 * The '+ 2' is for the null pointers at the end of each of
348 * the arg and env vector sets,and imgp->auxarg_size is room
349 * for argument of Runtime loader.
350 */
351 vectp = (char **)(destp - (imgp->args->argc +
352 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
353 } else {
354 /*
355 * The '+ 2' is for the null pointers at the end of each of
356 * the arg and env vector sets
357 */
358 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
359 sizeof(char *));
360 }
361
362 /*
363 * vectp also becomes our initial stack base
364 */
365 stack_base = (register_t *)vectp;
366
367 stringp = imgp->args->begin_argv;
368 argc = imgp->args->argc;
369 envc = imgp->args->envc;
370
371 /*
372 * Copy out strings - arguments and environment.
373 */
374 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
375
376 /*
377 * Fill in "ps_strings" struct for ps, w, etc.
378 */
379 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
380 suword(&arginfo->ps_nargvstr, argc);
381
382 /*
383 * Fill in argument portion of vector table.
384 */
385 for (; argc > 0; --argc) {
386 suword(vectp++, (long)(intptr_t)destp);
387 while (*stringp++ != 0)
388 destp++;
389 destp++;
390 }
391
392 /* a null vector table pointer separates the argp's from the envp's */
393 suword(vectp++, 0);
394
395 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
396 suword(&arginfo->ps_nenvstr, envc);
397
398 /*
399 * Fill in environment portion of vector table.
400 */
401 for (; envc > 0; --envc) {
402 suword(vectp++, (long)(intptr_t)destp);
403 while (*stringp++ != 0)
404 destp++;
405 destp++;
406 }
407
408 /* end of vector table is a null pointer */
409 suword(vectp, 0);
410
411 return (stack_base);
412}
413
414static void
415linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
416{
417 struct thread *td = curthread;
418 struct proc *p = td->td_proc;
419 struct sigacts *psp;
420 struct trapframe *regs;
421 struct l_rt_sigframe *fp, frame;
422 int sig, code;
423 int oonstack;
424
425 sig = ksi->ksi_signo;
426 code = ksi->ksi_code;
427 PROC_LOCK_ASSERT(p, MA_OWNED);
428 psp = p->p_sigacts;
429 mtx_assert(&psp->ps_mtx, MA_OWNED);
430 regs = td->td_frame;
431 oonstack = sigonstack(regs->tf_esp);
432
433#ifdef DEBUG
434 if (ldebug(rt_sendsig))
435 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
436 catcher, sig, (void*)mask, code);
437#endif
438 /*
439 * Allocate space for the signal handler context.
440 */
441 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
442 SIGISMEMBER(psp->ps_sigonstack, sig)) {
443 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
444 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
445 } else
446 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
447 mtx_unlock(&psp->ps_mtx);
448
449 /*
450 * Build the argument list for the signal handler.
451 */
452 if (p->p_sysent->sv_sigtbl)
453 if (sig <= p->p_sysent->sv_sigsize)
454 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
455
456 bzero(&frame, sizeof(frame));
457
458 frame.sf_handler = catcher;
459 frame.sf_sig = sig;
460 frame.sf_siginfo = &fp->sf_si;
461 frame.sf_ucontext = &fp->sf_sc;
462
463 /* Fill in POSIX parts */
464 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
465
466 /*
467 * Build the signal context to be used by sigreturn.
468 */
469 frame.sf_sc.uc_flags = 0; /* XXX ??? */
470 frame.sf_sc.uc_link = NULL; /* XXX ??? */
471
472 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
473 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
474 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
475 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
476 PROC_UNLOCK(p);
477
478 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
479
480 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
481 frame.sf_sc.uc_mcontext.sc_gs = rgs();
482 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
483 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
484 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
485 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
486 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
487 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
488 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
489 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_esp;
490 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
491 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
492 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
493 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
494 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
495 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
496 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
497 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
498 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
499 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
500 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
501
502#ifdef DEBUG
503 if (ldebug(rt_sendsig))
504 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
505 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
506 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
507#endif
508
509 if (copyout(&frame, fp, sizeof(frame)) != 0) {
510 /*
511 * Process has trashed its stack; give it an illegal
512 * instruction to halt it in its tracks.
513 */
514#ifdef DEBUG
515 if (ldebug(rt_sendsig))
516 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
517 fp, oonstack);
518#endif
519 PROC_LOCK(p);
520 sigexit(td, SIGILL);
521 }
522
523 /*
524 * Build context to run handler in.
525 */
526 regs->tf_esp = (int)fp;
527 regs->tf_eip = linux_rt_sigcode;
528 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
529 regs->tf_cs = _ucodesel;
530 regs->tf_ds = _udatasel;
531 regs->tf_es = _udatasel;
532 regs->tf_fs = _udatasel;
533 regs->tf_ss = _udatasel;
534 PROC_LOCK(p);
535 mtx_lock(&psp->ps_mtx);
536}
537
538
539/*
540 * Send an interrupt to process.
541 *
542 * Stack is set up to allow sigcode stored
543 * in u. to call routine, followed by kcall
544 * to sigreturn routine below. After sigreturn
545 * resets the signal mask, the stack, and the
546 * frame pointer, it returns to the user
547 * specified pc, psl.
548 */
549static void
550linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
551{
552 struct thread *td = curthread;
553 struct proc *p = td->td_proc;
554 struct sigacts *psp;
555 struct trapframe *regs;
556 struct l_sigframe *fp, frame;
557 l_sigset_t lmask;
558 int sig, code;
559 int oonstack, i;
560
561 PROC_LOCK_ASSERT(p, MA_OWNED);
562 psp = p->p_sigacts;
563 sig = ksi->ksi_signo;
564 code = ksi->ksi_code;
565 mtx_assert(&psp->ps_mtx, MA_OWNED);
566 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
567 /* Signal handler installed with SA_SIGINFO. */
568 linux_rt_sendsig(catcher, ksi, mask);
569 return;
570 }
571 regs = td->td_frame;
572 oonstack = sigonstack(regs->tf_esp);
573
574#ifdef DEBUG
575 if (ldebug(sendsig))
576 printf(ARGS(sendsig, "%p, %d, %p, %u"),
577 catcher, sig, (void*)mask, code);
578#endif
579
580 /*
581 * Allocate space for the signal handler context.
582 */
583 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
584 SIGISMEMBER(psp->ps_sigonstack, sig)) {
585 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
586 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
587 } else
588 fp = (struct l_sigframe *)regs->tf_esp - 1;
589 mtx_unlock(&psp->ps_mtx);
590 PROC_UNLOCK(p);
591
592 /*
593 * Build the argument list for the signal handler.
594 */
595 if (p->p_sysent->sv_sigtbl)
596 if (sig <= p->p_sysent->sv_sigsize)
597 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
598
599 bzero(&frame, sizeof(frame));
600
601 frame.sf_handler = catcher;
602 frame.sf_sig = sig;
603
604 bsd_to_linux_sigset(mask, &lmask);
605
606 /*
607 * Build the signal context to be used by sigreturn.
608 */
609 frame.sf_sc.sc_mask = lmask.__bits[0];
610 frame.sf_sc.sc_gs = rgs();
611 frame.sf_sc.sc_fs = regs->tf_fs;
612 frame.sf_sc.sc_es = regs->tf_es;
613 frame.sf_sc.sc_ds = regs->tf_ds;
614 frame.sf_sc.sc_edi = regs->tf_edi;
615 frame.sf_sc.sc_esi = regs->tf_esi;
616 frame.sf_sc.sc_ebp = regs->tf_ebp;
617 frame.sf_sc.sc_ebx = regs->tf_ebx;
618 frame.sf_sc.sc_esp = regs->tf_esp;
619 frame.sf_sc.sc_edx = regs->tf_edx;
620 frame.sf_sc.sc_ecx = regs->tf_ecx;
621 frame.sf_sc.sc_eax = regs->tf_eax;
622 frame.sf_sc.sc_eip = regs->tf_eip;
623 frame.sf_sc.sc_cs = regs->tf_cs;
624 frame.sf_sc.sc_eflags = regs->tf_eflags;
625 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
626 frame.sf_sc.sc_ss = regs->tf_ss;
627 frame.sf_sc.sc_err = regs->tf_err;
628 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
629 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
630
631 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
632 frame.sf_extramask[i] = lmask.__bits[i+1];
633
634 if (copyout(&frame, fp, sizeof(frame)) != 0) {
635 /*
636 * Process has trashed its stack; give it an illegal
637 * instruction to halt it in its tracks.
638 */
639 PROC_LOCK(p);
640 sigexit(td, SIGILL);
641 }
642
643 /*
644 * Build context to run handler in.
645 */
646 regs->tf_esp = (int)fp;
647 regs->tf_eip = linux_sigcode;
648 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
649 regs->tf_cs = _ucodesel;
650 regs->tf_ds = _udatasel;
651 regs->tf_es = _udatasel;
652 regs->tf_fs = _udatasel;
653 regs->tf_ss = _udatasel;
654 PROC_LOCK(p);
655 mtx_lock(&psp->ps_mtx);
656}
657
658/*
659 * System call to cleanup state after a signal
660 * has been taken. Reset signal mask and
661 * stack state from context left by sendsig (above).
662 * Return to previous pc and psl as specified by
663 * context left by sendsig. Check carefully to
664 * make sure that the user has not modified the
665 * psl to gain improper privileges or to cause
666 * a machine fault.
667 */
668int
669linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
670{
671 struct l_sigframe frame;
672 struct trapframe *regs;
673 l_sigset_t lmask;
674 sigset_t bmask;
675 int eflags, i;
676 ksiginfo_t ksi;
677
678 regs = td->td_frame;
679
680#ifdef DEBUG
681 if (ldebug(sigreturn))
682 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
683#endif
684 /*
685 * The trampoline code hands us the sigframe.
686 * It is unsafe to keep track of it ourselves, in the event that a
687 * program jumps out of a signal handler.
688 */
689 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
690 return (EFAULT);
691
692 /*
693 * Check for security violations.
694 */
695#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
696 eflags = frame.sf_sc.sc_eflags;
697 if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
698 return (EINVAL);
699
700 /*
701 * Don't allow users to load a valid privileged %cs. Let the
702 * hardware check for invalid selectors, excess privilege in
703 * other selectors, invalid %eip's and invalid %esp's.
704 */
705#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
706 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
707 ksiginfo_init_trap(&ksi);
708 ksi.ksi_signo = SIGBUS;
709 ksi.ksi_code = BUS_OBJERR;
710 ksi.ksi_trapno = T_PROTFLT;
711 ksi.ksi_addr = (void *)regs->tf_eip;
712 trapsignal(td, &ksi);
713 return (EINVAL);
714 }
715
716 lmask.__bits[0] = frame.sf_sc.sc_mask;
717 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
718 lmask.__bits[i+1] = frame.sf_extramask[i];
719 linux_to_bsd_sigset(&lmask, &bmask);
720 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
721
722 /*
723 * Restore signal context.
724 */
725 /* %gs was restored by the trampoline. */
726 regs->tf_fs = frame.sf_sc.sc_fs;
727 regs->tf_es = frame.sf_sc.sc_es;
728 regs->tf_ds = frame.sf_sc.sc_ds;
729 regs->tf_edi = frame.sf_sc.sc_edi;
730 regs->tf_esi = frame.sf_sc.sc_esi;
731 regs->tf_ebp = frame.sf_sc.sc_ebp;
732 regs->tf_ebx = frame.sf_sc.sc_ebx;
733 regs->tf_edx = frame.sf_sc.sc_edx;
734 regs->tf_ecx = frame.sf_sc.sc_ecx;
735 regs->tf_eax = frame.sf_sc.sc_eax;
736 regs->tf_eip = frame.sf_sc.sc_eip;
737 regs->tf_cs = frame.sf_sc.sc_cs;
738 regs->tf_eflags = eflags;
739 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
740 regs->tf_ss = frame.sf_sc.sc_ss;
741
742 return (EJUSTRETURN);
743}
744
745/*
746 * System call to cleanup state after a signal
747 * has been taken. Reset signal mask and
748 * stack state from context left by rt_sendsig (above).
749 * Return to previous pc and psl as specified by
750 * context left by sendsig. Check carefully to
751 * make sure that the user has not modified the
752 * psl to gain improper privileges or to cause
753 * a machine fault.
754 */
755int
756linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
757{
758 struct l_ucontext uc;
759 struct l_sigcontext *context;
760 sigset_t bmask;
761 l_stack_t *lss;
762 stack_t ss;
763 struct trapframe *regs;
764 int eflags;
765 ksiginfo_t ksi;
766
767 regs = td->td_frame;
768
769#ifdef DEBUG
770 if (ldebug(rt_sigreturn))
771 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
772#endif
773 /*
774 * The trampoline code hands us the ucontext.
775 * It is unsafe to keep track of it ourselves, in the event that a
776 * program jumps out of a signal handler.
777 */
778 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
779 return (EFAULT);
780
781 context = &uc.uc_mcontext;
782
783 /*
784 * Check for security violations.
785 */
786#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
787 eflags = context->sc_eflags;
788 if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
789 return (EINVAL);
790
791 /*
792 * Don't allow users to load a valid privileged %cs. Let the
793 * hardware check for invalid selectors, excess privilege in
794 * other selectors, invalid %eip's and invalid %esp's.
795 */
796#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
797 if (!CS_SECURE(context->sc_cs)) {
798 ksiginfo_init_trap(&ksi);
799 ksi.ksi_signo = SIGBUS;
800 ksi.ksi_code = BUS_OBJERR;
801 ksi.ksi_trapno = T_PROTFLT;
802 ksi.ksi_addr = (void *)regs->tf_eip;
803 trapsignal(td, &ksi);
804 return (EINVAL);
805 }
806
807 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
808 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
809
810 /*
811 * Restore signal context
812 */
813 /* %gs was restored by the trampoline. */
814 regs->tf_fs = context->sc_fs;
815 regs->tf_es = context->sc_es;
816 regs->tf_ds = context->sc_ds;
817 regs->tf_edi = context->sc_edi;
818 regs->tf_esi = context->sc_esi;
819 regs->tf_ebp = context->sc_ebp;
820 regs->tf_ebx = context->sc_ebx;
821 regs->tf_edx = context->sc_edx;
822 regs->tf_ecx = context->sc_ecx;
823 regs->tf_eax = context->sc_eax;
824 regs->tf_eip = context->sc_eip;
825 regs->tf_cs = context->sc_cs;
826 regs->tf_eflags = eflags;
827 regs->tf_esp = context->sc_esp_at_signal;
828 regs->tf_ss = context->sc_ss;
829
830 /*
831 * call sigaltstack & ignore results..
832 */
833 lss = &uc.uc_stack;
834 ss.ss_sp = lss->ss_sp;
835 ss.ss_size = lss->ss_size;
836 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
837
838#ifdef DEBUG
839 if (ldebug(rt_sigreturn))
840 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
841 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
842#endif
843 (void)kern_sigaltstack(td, &ss, NULL);
844
845 return (EJUSTRETURN);
846}
847
848static int
849linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
850{
851 struct proc *p;
852 struct trapframe *frame;
853
854 p = td->td_proc;
855 frame = td->td_frame;
856
857 sa->code = frame->tf_eax;
858 sa->args[0] = frame->tf_ebx;
859 sa->args[1] = frame->tf_ecx;
860 sa->args[2] = frame->tf_edx;
861 sa->args[3] = frame->tf_esi;
862 sa->args[4] = frame->tf_edi;
863 sa->args[5] = frame->tf_ebp; /* Unconfirmed */
864
865 if (sa->code >= p->p_sysent->sv_size)
866 sa->callp = &p->p_sysent->sv_table[0];
867 else
868 sa->callp = &p->p_sysent->sv_table[sa->code];
869 sa->narg = sa->callp->sy_narg;
870
871 td->td_retval[0] = 0;
872 td->td_retval[1] = frame->tf_edx;
873
874 return (0);
875}
876
877/*
878 * If a linux binary is exec'ing something, try this image activator
879 * first. We override standard shell script execution in order to
880 * be able to modify the interpreter path. We only do this if a linux
881 * binary is doing the exec, so we do not create an EXEC module for it.
882 */
883static int exec_linux_imgact_try(struct image_params *iparams);
884
885static int
886exec_linux_imgact_try(struct image_params *imgp)
887{
888 const char *head = (const char *)imgp->image_header;
889 char *rpath;
890 int error = -1;
891
892 /*
893 * The interpreter for shell scripts run from a linux binary needs
894 * to be located in /compat/linux if possible in order to recursively
895 * maintain linux path emulation.
896 */
897 if (((const short *)head)[0] == SHELLMAGIC) {
898 /*
899 * Run our normal shell image activator. If it succeeds attempt
900 * to use the alternate path for the interpreter. If an alternate
901 * path is found, use our stringspace to store it.
902 */
903 if ((error = exec_shell_imgact(imgp)) == 0) {
904 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
905 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
906 if (rpath != NULL)
907 imgp->args->fname_buf =
908 imgp->interpreter_name = rpath;
909 }
910 }
911 return (error);
912}
913
914/*
915 * exec_setregs may initialize some registers differently than Linux
916 * does, thus potentially confusing Linux binaries. If necessary, we
917 * override the exec_setregs default(s) here.
918 */
919static void
920exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
921{
922 struct pcb *pcb = td->td_pcb;
923
924 exec_setregs(td, imgp, stack);
925
926 /* Linux sets %gs to 0, we default to _udatasel */
927 pcb->pcb_gs = 0;
928 load_gs(0);
929
930 pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
931}
932
933static void
934linux_get_machine(const char **dst)
935{
936
937 switch (cpu_class) {
938 case CPUCLASS_686:
939 *dst = "i686";
940 break;
941 case CPUCLASS_586:
942 *dst = "i586";
943 break;
944 case CPUCLASS_486:
945 *dst = "i486";
946 break;
947 default:
948 *dst = "i386";
949 }
950}
951
952struct sysentvec linux_sysvec = {
953 .sv_size = LINUX_SYS_MAXSYSCALL,
954 .sv_table = linux_sysent,
955 .sv_mask = 0,
956 .sv_sigsize = LINUX_SIGTBLSZ,
957 .sv_sigtbl = bsd_to_linux_signal,
958 .sv_errsize = ELAST + 1,
959 .sv_errtbl = bsd_to_linux_errno,
960 .sv_transtrap = translate_traps,
961 .sv_fixup = linux_fixup,
962 .sv_sendsig = linux_sendsig,
963 .sv_sigcode = &_binary_linux_locore_o_start,
964 .sv_szsigcode = &linux_szsigcode,
965 .sv_prepsyscall = NULL,
966 .sv_name = "Linux a.out",
967 .sv_coredump = NULL,
968 .sv_imgact_try = exec_linux_imgact_try,
969 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
970 .sv_pagesize = PAGE_SIZE,
971 .sv_minuser = VM_MIN_ADDRESS,
972 .sv_maxuser = VM_MAXUSER_ADDRESS,
973 .sv_usrstack = LINUX_USRSTACK,
974 .sv_psstrings = PS_STRINGS,
975 .sv_stackprot = VM_PROT_ALL,
976 .sv_copyout_strings = exec_copyout_strings,
977 .sv_setregs = exec_linux_setregs,
978 .sv_fixlimit = NULL,
979 .sv_maxssiz = NULL,
980 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
981 .sv_set_syscall_retval = cpu_set_syscall_retval,
982 .sv_fetch_syscall_args = linux_fetch_syscall_args,
983 .sv_syscallnames = NULL,
984 .sv_shared_page_base = LINUX_SHAREDPAGE,
985 .sv_shared_page_len = PAGE_SIZE,
986 .sv_schedtail = linux_schedtail,
987 .sv_thread_detach = linux_thread_detach,
988};
989INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
990
991struct sysentvec elf_linux_sysvec = {
992 .sv_size = LINUX_SYS_MAXSYSCALL,
993 .sv_table = linux_sysent,
994 .sv_mask = 0,
995 .sv_sigsize = LINUX_SIGTBLSZ,
996 .sv_sigtbl = bsd_to_linux_signal,
997 .sv_errsize = ELAST + 1,
998 .sv_errtbl = bsd_to_linux_errno,
999 .sv_transtrap = translate_traps,
1000 .sv_fixup = elf_linux_fixup,
1001 .sv_sendsig = linux_sendsig,
1002 .sv_sigcode = &_binary_linux_locore_o_start,
1003 .sv_szsigcode = &linux_szsigcode,
1004 .sv_prepsyscall = NULL,
1005 .sv_name = "Linux ELF",
1006 .sv_coredump = elf32_coredump,
1007 .sv_imgact_try = exec_linux_imgact_try,
1008 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1009 .sv_pagesize = PAGE_SIZE,
1010 .sv_minuser = VM_MIN_ADDRESS,
1011 .sv_maxuser = VM_MAXUSER_ADDRESS,
1012 .sv_usrstack = LINUX_USRSTACK,
1013 .sv_psstrings = LINUX_PS_STRINGS,
1014 .sv_stackprot = VM_PROT_ALL,
1015 .sv_copyout_strings = linux_copyout_strings,
1016 .sv_setregs = exec_linux_setregs,
1017 .sv_fixlimit = NULL,
1018 .sv_maxssiz = NULL,
1019 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1020 .sv_set_syscall_retval = cpu_set_syscall_retval,
1021 .sv_fetch_syscall_args = linux_fetch_syscall_args,
1022 .sv_syscallnames = NULL,
1023 .sv_shared_page_base = LINUX_SHAREDPAGE,
1024 .sv_shared_page_len = PAGE_SIZE,
1025 .sv_schedtail = linux_schedtail,
1026 .sv_thread_detach = linux_thread_detach,
1027};
1028
1029static void
1030linux_vdso_install(void *param)
1031{
1032
1033 linux_szsigcode = (&_binary_linux_locore_o_end -
1034 &_binary_linux_locore_o_start);
1035
1036 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
1037 panic("Linux invalid vdso size\n");
1038
1039 __elfN(linux_vdso_fixup)(&elf_linux_sysvec);
1040
1041 linux_shared_page_obj = __elfN(linux_shared_page_init)
1042 (&linux_shared_page_mapping);
1043
1044 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX_SHAREDPAGE);
1045
1046 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
1047 linux_szsigcode);
1048 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
1049}
1050SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
1051 (sysinit_cfunc_t)linux_vdso_install, NULL);
1052
1053static void
1054linux_vdso_deinstall(void *param)
1055{
1056
1057 __elfN(linux_shared_page_fini)(linux_shared_page_obj);
1058};
1059SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
1060 (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
1061
1062static char GNU_ABI_VENDOR[] = "GNU";
1063static int GNULINUX_ABI_DESC = 0;
1064
1065static boolean_t
1066linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1067{
1068 const Elf32_Word *desc;
1069 uintptr_t p;
1070
1071 p = (uintptr_t)(note + 1);
1072 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1073
1074 desc = (const Elf32_Word *)p;
1075 if (desc[0] != GNULINUX_ABI_DESC)
1076 return (FALSE);
1077
1078 /*
1079 * For linux we encode osrel as follows (see linux_mib.c):
1080 * VVVMMMIII (version, major, minor), see linux_mib.c.
1081 */
1082 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1083
1084 return (TRUE);
1085}
1086
1087static Elf_Brandnote linux_brandnote = {
1088 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1089 .hdr.n_descsz = 16, /* XXX at least 16 */
1090 .hdr.n_type = 1,
1091 .vendor = GNU_ABI_VENDOR,
1092 .flags = BN_TRANSLATE_OSREL,
1093 .trans_osrel = linux_trans_osrel
1094};
1095
1096static Elf32_Brandinfo linux_brand = {
1097 .brand = ELFOSABI_LINUX,
1098 .machine = EM_386,
1099 .compat_3_brand = "Linux",
1100 .emul_path = "/compat/linux",
1101 .interp_path = "/lib/ld-linux.so.1",
1102 .sysvec = &elf_linux_sysvec,
1103 .interp_newpath = NULL,
1104 .brand_note = &linux_brandnote,
1105 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1106};
1107
1108static Elf32_Brandinfo linux_glibc2brand = {
1109 .brand = ELFOSABI_LINUX,
1110 .machine = EM_386,
1111 .compat_3_brand = "Linux",
1112 .emul_path = "/compat/linux",
1113 .interp_path = "/lib/ld-linux.so.2",
1114 .sysvec = &elf_linux_sysvec,
1115 .interp_newpath = NULL,
1116 .brand_note = &linux_brandnote,
1117 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1118};
1119
1120Elf32_Brandinfo *linux_brandlist[] = {
1121 &linux_brand,
1122 &linux_glibc2brand,
1123 NULL
1124};
1125
1126static int
1127linux_elf_modevent(module_t mod, int type, void *data)
1128{
1129 Elf32_Brandinfo **brandinfo;
1130 int error;
1131 struct linux_ioctl_handler **lihp;
112
113static int linux_fixup(register_t **stack_base,
114 struct image_params *iparams);
115static int elf_linux_fixup(register_t **stack_base,
116 struct image_params *iparams);
117static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
118static void exec_linux_setregs(struct thread *td,
119 struct image_params *imgp, u_long stack);
120static register_t *linux_copyout_strings(struct image_params *imgp);
121static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
122static void linux_vdso_install(void *param);
123static void linux_vdso_deinstall(void *param);
124
125static int linux_szplatform;
126const char *linux_kplatform;
127
128static eventhandler_tag linux_exit_tag;
129static eventhandler_tag linux_exec_tag;
130static eventhandler_tag linux_thread_dtor_tag;
131
132/*
133 * Linux syscalls return negative errno's, we do positive and map them
134 * Reference:
135 * FreeBSD: src/sys/sys/errno.h
136 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
137 * linux-2.6.17.8/include/asm-generic/errno.h
138 */
139static int bsd_to_linux_errno[ELAST + 1] = {
140 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
141 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
142 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
143 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
144 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
145 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
146 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
147 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
148 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
149 -72, -67, -71
150};
151
152int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
153 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
154 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
155 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
156 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
157 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
158 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
159 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
160 0, LINUX_SIGUSR1, LINUX_SIGUSR2
161};
162
163int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
164 SIGHUP, SIGINT, SIGQUIT, SIGILL,
165 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
166 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
167 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
168 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
169 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
170 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
171 SIGIO, SIGURG, SIGSYS
172};
173
174#define LINUX_T_UNKNOWN 255
175static int _bsd_to_linux_trapcode[] = {
176 LINUX_T_UNKNOWN, /* 0 */
177 6, /* 1 T_PRIVINFLT */
178 LINUX_T_UNKNOWN, /* 2 */
179 3, /* 3 T_BPTFLT */
180 LINUX_T_UNKNOWN, /* 4 */
181 LINUX_T_UNKNOWN, /* 5 */
182 16, /* 6 T_ARITHTRAP */
183 254, /* 7 T_ASTFLT */
184 LINUX_T_UNKNOWN, /* 8 */
185 13, /* 9 T_PROTFLT */
186 1, /* 10 T_TRCTRAP */
187 LINUX_T_UNKNOWN, /* 11 */
188 14, /* 12 T_PAGEFLT */
189 LINUX_T_UNKNOWN, /* 13 */
190 17, /* 14 T_ALIGNFLT */
191 LINUX_T_UNKNOWN, /* 15 */
192 LINUX_T_UNKNOWN, /* 16 */
193 LINUX_T_UNKNOWN, /* 17 */
194 0, /* 18 T_DIVIDE */
195 2, /* 19 T_NMI */
196 4, /* 20 T_OFLOW */
197 5, /* 21 T_BOUND */
198 7, /* 22 T_DNA */
199 8, /* 23 T_DOUBLEFLT */
200 9, /* 24 T_FPOPFLT */
201 10, /* 25 T_TSSFLT */
202 11, /* 26 T_SEGNPFLT */
203 12, /* 27 T_STKFLT */
204 18, /* 28 T_MCHK */
205 19, /* 29 T_XMMFLT */
206 15 /* 30 T_RESERVED */
207};
208#define bsd_to_linux_trapcode(code) \
209 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
210 _bsd_to_linux_trapcode[(code)]: \
211 LINUX_T_UNKNOWN)
212
213LINUX_VDSO_SYM_INTPTR(linux_sigcode);
214LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
215LINUX_VDSO_SYM_INTPTR(linux_vsyscall);
216
217/*
218 * If FreeBSD & Linux have a difference of opinion about what a trap
219 * means, deal with it here.
220 *
221 * MPSAFE
222 */
223static int
224translate_traps(int signal, int trap_code)
225{
226 if (signal != SIGBUS)
227 return (signal);
228 switch (trap_code) {
229 case T_PROTFLT:
230 case T_TSSFLT:
231 case T_DOUBLEFLT:
232 case T_PAGEFLT:
233 return (SIGSEGV);
234 default:
235 return (signal);
236 }
237}
238
239static int
240linux_fixup(register_t **stack_base, struct image_params *imgp)
241{
242 register_t *argv, *envp;
243
244 argv = *stack_base;
245 envp = *stack_base + (imgp->args->argc + 1);
246 (*stack_base)--;
247 suword(*stack_base, (intptr_t)(void *)envp);
248 (*stack_base)--;
249 suword(*stack_base, (intptr_t)(void *)argv);
250 (*stack_base)--;
251 suword(*stack_base, imgp->args->argc);
252 return (0);
253}
254
255static int
256elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
257{
258 struct proc *p;
259 Elf32_Auxargs *args;
260 Elf32_Addr *uplatform;
261 struct ps_strings *arginfo;
262 register_t *pos;
263
264 KASSERT(curthread->td_proc == imgp->proc,
265 ("unsafe elf_linux_fixup(), should be curproc"));
266
267 p = imgp->proc;
268 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
269 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
270 args = (Elf32_Auxargs *)imgp->auxargs;
271 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
272
273 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
274 imgp->proc->p_sysent->sv_shared_page_base);
275 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall);
276 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
277
278 /*
279 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
280 * as it has appeared in the 2.4.0-rc7 first time.
281 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
282 * glibc falls back to the hard-coded CLK_TCK value when aux entry
283 * is not present.
284 * Also see linux_times() implementation.
285 */
286 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
287 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
288 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
289 AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
290 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
291 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
292 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
293 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
294 AUXARGS_ENTRY(pos, AT_BASE, args->base);
295 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
296 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
297 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
298 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
299 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
300 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
301 if (args->execfd != -1)
302 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
303 AUXARGS_ENTRY(pos, AT_NULL, 0);
304
305 free(imgp->auxargs, M_TEMP);
306 imgp->auxargs = NULL;
307
308 (*stack_base)--;
309 suword(*stack_base, (register_t)imgp->args->argc);
310 return (0);
311}
312
313/*
314 * Copied from kern/kern_exec.c
315 */
316static register_t *
317linux_copyout_strings(struct image_params *imgp)
318{
319 int argc, envc;
320 char **vectp;
321 char *stringp, *destp;
322 register_t *stack_base;
323 struct ps_strings *arginfo;
324 struct proc *p;
325
326 /*
327 * Calculate string base and vector table pointers.
328 */
329 p = imgp->proc;
330 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
331 destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
332 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
333
334 /*
335 * install LINUX_PLATFORM
336 */
337 copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform),
338 linux_szplatform);
339
340 /*
341 * If we have a valid auxargs ptr, prepare some room
342 * on the stack.
343 */
344 if (imgp->auxargs) {
345 /*
346 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
347 * lower compatibility.
348 */
349 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
350 (LINUX_AT_COUNT * 2);
351 /*
352 * The '+ 2' is for the null pointers at the end of each of
353 * the arg and env vector sets,and imgp->auxarg_size is room
354 * for argument of Runtime loader.
355 */
356 vectp = (char **)(destp - (imgp->args->argc +
357 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
358 } else {
359 /*
360 * The '+ 2' is for the null pointers at the end of each of
361 * the arg and env vector sets
362 */
363 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
364 sizeof(char *));
365 }
366
367 /*
368 * vectp also becomes our initial stack base
369 */
370 stack_base = (register_t *)vectp;
371
372 stringp = imgp->args->begin_argv;
373 argc = imgp->args->argc;
374 envc = imgp->args->envc;
375
376 /*
377 * Copy out strings - arguments and environment.
378 */
379 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
380
381 /*
382 * Fill in "ps_strings" struct for ps, w, etc.
383 */
384 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
385 suword(&arginfo->ps_nargvstr, argc);
386
387 /*
388 * Fill in argument portion of vector table.
389 */
390 for (; argc > 0; --argc) {
391 suword(vectp++, (long)(intptr_t)destp);
392 while (*stringp++ != 0)
393 destp++;
394 destp++;
395 }
396
397 /* a null vector table pointer separates the argp's from the envp's */
398 suword(vectp++, 0);
399
400 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
401 suword(&arginfo->ps_nenvstr, envc);
402
403 /*
404 * Fill in environment portion of vector table.
405 */
406 for (; envc > 0; --envc) {
407 suword(vectp++, (long)(intptr_t)destp);
408 while (*stringp++ != 0)
409 destp++;
410 destp++;
411 }
412
413 /* end of vector table is a null pointer */
414 suword(vectp, 0);
415
416 return (stack_base);
417}
418
419static void
420linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
421{
422 struct thread *td = curthread;
423 struct proc *p = td->td_proc;
424 struct sigacts *psp;
425 struct trapframe *regs;
426 struct l_rt_sigframe *fp, frame;
427 int sig, code;
428 int oonstack;
429
430 sig = ksi->ksi_signo;
431 code = ksi->ksi_code;
432 PROC_LOCK_ASSERT(p, MA_OWNED);
433 psp = p->p_sigacts;
434 mtx_assert(&psp->ps_mtx, MA_OWNED);
435 regs = td->td_frame;
436 oonstack = sigonstack(regs->tf_esp);
437
438#ifdef DEBUG
439 if (ldebug(rt_sendsig))
440 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
441 catcher, sig, (void*)mask, code);
442#endif
443 /*
444 * Allocate space for the signal handler context.
445 */
446 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
447 SIGISMEMBER(psp->ps_sigonstack, sig)) {
448 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
449 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
450 } else
451 fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
452 mtx_unlock(&psp->ps_mtx);
453
454 /*
455 * Build the argument list for the signal handler.
456 */
457 if (p->p_sysent->sv_sigtbl)
458 if (sig <= p->p_sysent->sv_sigsize)
459 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
460
461 bzero(&frame, sizeof(frame));
462
463 frame.sf_handler = catcher;
464 frame.sf_sig = sig;
465 frame.sf_siginfo = &fp->sf_si;
466 frame.sf_ucontext = &fp->sf_sc;
467
468 /* Fill in POSIX parts */
469 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
470
471 /*
472 * Build the signal context to be used by sigreturn.
473 */
474 frame.sf_sc.uc_flags = 0; /* XXX ??? */
475 frame.sf_sc.uc_link = NULL; /* XXX ??? */
476
477 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
478 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
479 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
480 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
481 PROC_UNLOCK(p);
482
483 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
484
485 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
486 frame.sf_sc.uc_mcontext.sc_gs = rgs();
487 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
488 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
489 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
490 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi;
491 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi;
492 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp;
493 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx;
494 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_esp;
495 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx;
496 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx;
497 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax;
498 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip;
499 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
500 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
501 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
502 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
503 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
504 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
505 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
506
507#ifdef DEBUG
508 if (ldebug(rt_sendsig))
509 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
510 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
511 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
512#endif
513
514 if (copyout(&frame, fp, sizeof(frame)) != 0) {
515 /*
516 * Process has trashed its stack; give it an illegal
517 * instruction to halt it in its tracks.
518 */
519#ifdef DEBUG
520 if (ldebug(rt_sendsig))
521 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
522 fp, oonstack);
523#endif
524 PROC_LOCK(p);
525 sigexit(td, SIGILL);
526 }
527
528 /*
529 * Build context to run handler in.
530 */
531 regs->tf_esp = (int)fp;
532 regs->tf_eip = linux_rt_sigcode;
533 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
534 regs->tf_cs = _ucodesel;
535 regs->tf_ds = _udatasel;
536 regs->tf_es = _udatasel;
537 regs->tf_fs = _udatasel;
538 regs->tf_ss = _udatasel;
539 PROC_LOCK(p);
540 mtx_lock(&psp->ps_mtx);
541}
542
543
544/*
545 * Send an interrupt to process.
546 *
547 * Stack is set up to allow sigcode stored
548 * in u. to call routine, followed by kcall
549 * to sigreturn routine below. After sigreturn
550 * resets the signal mask, the stack, and the
551 * frame pointer, it returns to the user
552 * specified pc, psl.
553 */
554static void
555linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
556{
557 struct thread *td = curthread;
558 struct proc *p = td->td_proc;
559 struct sigacts *psp;
560 struct trapframe *regs;
561 struct l_sigframe *fp, frame;
562 l_sigset_t lmask;
563 int sig, code;
564 int oonstack, i;
565
566 PROC_LOCK_ASSERT(p, MA_OWNED);
567 psp = p->p_sigacts;
568 sig = ksi->ksi_signo;
569 code = ksi->ksi_code;
570 mtx_assert(&psp->ps_mtx, MA_OWNED);
571 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
572 /* Signal handler installed with SA_SIGINFO. */
573 linux_rt_sendsig(catcher, ksi, mask);
574 return;
575 }
576 regs = td->td_frame;
577 oonstack = sigonstack(regs->tf_esp);
578
579#ifdef DEBUG
580 if (ldebug(sendsig))
581 printf(ARGS(sendsig, "%p, %d, %p, %u"),
582 catcher, sig, (void*)mask, code);
583#endif
584
585 /*
586 * Allocate space for the signal handler context.
587 */
588 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
589 SIGISMEMBER(psp->ps_sigonstack, sig)) {
590 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
591 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
592 } else
593 fp = (struct l_sigframe *)regs->tf_esp - 1;
594 mtx_unlock(&psp->ps_mtx);
595 PROC_UNLOCK(p);
596
597 /*
598 * Build the argument list for the signal handler.
599 */
600 if (p->p_sysent->sv_sigtbl)
601 if (sig <= p->p_sysent->sv_sigsize)
602 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
603
604 bzero(&frame, sizeof(frame));
605
606 frame.sf_handler = catcher;
607 frame.sf_sig = sig;
608
609 bsd_to_linux_sigset(mask, &lmask);
610
611 /*
612 * Build the signal context to be used by sigreturn.
613 */
614 frame.sf_sc.sc_mask = lmask.__bits[0];
615 frame.sf_sc.sc_gs = rgs();
616 frame.sf_sc.sc_fs = regs->tf_fs;
617 frame.sf_sc.sc_es = regs->tf_es;
618 frame.sf_sc.sc_ds = regs->tf_ds;
619 frame.sf_sc.sc_edi = regs->tf_edi;
620 frame.sf_sc.sc_esi = regs->tf_esi;
621 frame.sf_sc.sc_ebp = regs->tf_ebp;
622 frame.sf_sc.sc_ebx = regs->tf_ebx;
623 frame.sf_sc.sc_esp = regs->tf_esp;
624 frame.sf_sc.sc_edx = regs->tf_edx;
625 frame.sf_sc.sc_ecx = regs->tf_ecx;
626 frame.sf_sc.sc_eax = regs->tf_eax;
627 frame.sf_sc.sc_eip = regs->tf_eip;
628 frame.sf_sc.sc_cs = regs->tf_cs;
629 frame.sf_sc.sc_eflags = regs->tf_eflags;
630 frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
631 frame.sf_sc.sc_ss = regs->tf_ss;
632 frame.sf_sc.sc_err = regs->tf_err;
633 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
634 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
635
636 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
637 frame.sf_extramask[i] = lmask.__bits[i+1];
638
639 if (copyout(&frame, fp, sizeof(frame)) != 0) {
640 /*
641 * Process has trashed its stack; give it an illegal
642 * instruction to halt it in its tracks.
643 */
644 PROC_LOCK(p);
645 sigexit(td, SIGILL);
646 }
647
648 /*
649 * Build context to run handler in.
650 */
651 regs->tf_esp = (int)fp;
652 regs->tf_eip = linux_sigcode;
653 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
654 regs->tf_cs = _ucodesel;
655 regs->tf_ds = _udatasel;
656 regs->tf_es = _udatasel;
657 regs->tf_fs = _udatasel;
658 regs->tf_ss = _udatasel;
659 PROC_LOCK(p);
660 mtx_lock(&psp->ps_mtx);
661}
662
663/*
664 * System call to cleanup state after a signal
665 * has been taken. Reset signal mask and
666 * stack state from context left by sendsig (above).
667 * Return to previous pc and psl as specified by
668 * context left by sendsig. Check carefully to
669 * make sure that the user has not modified the
670 * psl to gain improper privileges or to cause
671 * a machine fault.
672 */
673int
674linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
675{
676 struct l_sigframe frame;
677 struct trapframe *regs;
678 l_sigset_t lmask;
679 sigset_t bmask;
680 int eflags, i;
681 ksiginfo_t ksi;
682
683 regs = td->td_frame;
684
685#ifdef DEBUG
686 if (ldebug(sigreturn))
687 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
688#endif
689 /*
690 * The trampoline code hands us the sigframe.
691 * It is unsafe to keep track of it ourselves, in the event that a
692 * program jumps out of a signal handler.
693 */
694 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
695 return (EFAULT);
696
697 /*
698 * Check for security violations.
699 */
700#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
701 eflags = frame.sf_sc.sc_eflags;
702 if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
703 return (EINVAL);
704
705 /*
706 * Don't allow users to load a valid privileged %cs. Let the
707 * hardware check for invalid selectors, excess privilege in
708 * other selectors, invalid %eip's and invalid %esp's.
709 */
710#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
711 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
712 ksiginfo_init_trap(&ksi);
713 ksi.ksi_signo = SIGBUS;
714 ksi.ksi_code = BUS_OBJERR;
715 ksi.ksi_trapno = T_PROTFLT;
716 ksi.ksi_addr = (void *)regs->tf_eip;
717 trapsignal(td, &ksi);
718 return (EINVAL);
719 }
720
721 lmask.__bits[0] = frame.sf_sc.sc_mask;
722 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
723 lmask.__bits[i+1] = frame.sf_extramask[i];
724 linux_to_bsd_sigset(&lmask, &bmask);
725 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
726
727 /*
728 * Restore signal context.
729 */
730 /* %gs was restored by the trampoline. */
731 regs->tf_fs = frame.sf_sc.sc_fs;
732 regs->tf_es = frame.sf_sc.sc_es;
733 regs->tf_ds = frame.sf_sc.sc_ds;
734 regs->tf_edi = frame.sf_sc.sc_edi;
735 regs->tf_esi = frame.sf_sc.sc_esi;
736 regs->tf_ebp = frame.sf_sc.sc_ebp;
737 regs->tf_ebx = frame.sf_sc.sc_ebx;
738 regs->tf_edx = frame.sf_sc.sc_edx;
739 regs->tf_ecx = frame.sf_sc.sc_ecx;
740 regs->tf_eax = frame.sf_sc.sc_eax;
741 regs->tf_eip = frame.sf_sc.sc_eip;
742 regs->tf_cs = frame.sf_sc.sc_cs;
743 regs->tf_eflags = eflags;
744 regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
745 regs->tf_ss = frame.sf_sc.sc_ss;
746
747 return (EJUSTRETURN);
748}
749
750/*
751 * System call to cleanup state after a signal
752 * has been taken. Reset signal mask and
753 * stack state from context left by rt_sendsig (above).
754 * Return to previous pc and psl as specified by
755 * context left by sendsig. Check carefully to
756 * make sure that the user has not modified the
757 * psl to gain improper privileges or to cause
758 * a machine fault.
759 */
760int
761linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
762{
763 struct l_ucontext uc;
764 struct l_sigcontext *context;
765 sigset_t bmask;
766 l_stack_t *lss;
767 stack_t ss;
768 struct trapframe *regs;
769 int eflags;
770 ksiginfo_t ksi;
771
772 regs = td->td_frame;
773
774#ifdef DEBUG
775 if (ldebug(rt_sigreturn))
776 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
777#endif
778 /*
779 * The trampoline code hands us the ucontext.
780 * It is unsafe to keep track of it ourselves, in the event that a
781 * program jumps out of a signal handler.
782 */
783 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
784 return (EFAULT);
785
786 context = &uc.uc_mcontext;
787
788 /*
789 * Check for security violations.
790 */
791#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
792 eflags = context->sc_eflags;
793 if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
794 return (EINVAL);
795
796 /*
797 * Don't allow users to load a valid privileged %cs. Let the
798 * hardware check for invalid selectors, excess privilege in
799 * other selectors, invalid %eip's and invalid %esp's.
800 */
801#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
802 if (!CS_SECURE(context->sc_cs)) {
803 ksiginfo_init_trap(&ksi);
804 ksi.ksi_signo = SIGBUS;
805 ksi.ksi_code = BUS_OBJERR;
806 ksi.ksi_trapno = T_PROTFLT;
807 ksi.ksi_addr = (void *)regs->tf_eip;
808 trapsignal(td, &ksi);
809 return (EINVAL);
810 }
811
812 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
813 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
814
815 /*
816 * Restore signal context
817 */
818 /* %gs was restored by the trampoline. */
819 regs->tf_fs = context->sc_fs;
820 regs->tf_es = context->sc_es;
821 regs->tf_ds = context->sc_ds;
822 regs->tf_edi = context->sc_edi;
823 regs->tf_esi = context->sc_esi;
824 regs->tf_ebp = context->sc_ebp;
825 regs->tf_ebx = context->sc_ebx;
826 regs->tf_edx = context->sc_edx;
827 regs->tf_ecx = context->sc_ecx;
828 regs->tf_eax = context->sc_eax;
829 regs->tf_eip = context->sc_eip;
830 regs->tf_cs = context->sc_cs;
831 regs->tf_eflags = eflags;
832 regs->tf_esp = context->sc_esp_at_signal;
833 regs->tf_ss = context->sc_ss;
834
835 /*
836 * call sigaltstack & ignore results..
837 */
838 lss = &uc.uc_stack;
839 ss.ss_sp = lss->ss_sp;
840 ss.ss_size = lss->ss_size;
841 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
842
843#ifdef DEBUG
844 if (ldebug(rt_sigreturn))
845 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
846 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
847#endif
848 (void)kern_sigaltstack(td, &ss, NULL);
849
850 return (EJUSTRETURN);
851}
852
853static int
854linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
855{
856 struct proc *p;
857 struct trapframe *frame;
858
859 p = td->td_proc;
860 frame = td->td_frame;
861
862 sa->code = frame->tf_eax;
863 sa->args[0] = frame->tf_ebx;
864 sa->args[1] = frame->tf_ecx;
865 sa->args[2] = frame->tf_edx;
866 sa->args[3] = frame->tf_esi;
867 sa->args[4] = frame->tf_edi;
868 sa->args[5] = frame->tf_ebp; /* Unconfirmed */
869
870 if (sa->code >= p->p_sysent->sv_size)
871 sa->callp = &p->p_sysent->sv_table[0];
872 else
873 sa->callp = &p->p_sysent->sv_table[sa->code];
874 sa->narg = sa->callp->sy_narg;
875
876 td->td_retval[0] = 0;
877 td->td_retval[1] = frame->tf_edx;
878
879 return (0);
880}
881
882/*
883 * If a linux binary is exec'ing something, try this image activator
884 * first. We override standard shell script execution in order to
885 * be able to modify the interpreter path. We only do this if a linux
886 * binary is doing the exec, so we do not create an EXEC module for it.
887 */
888static int exec_linux_imgact_try(struct image_params *iparams);
889
890static int
891exec_linux_imgact_try(struct image_params *imgp)
892{
893 const char *head = (const char *)imgp->image_header;
894 char *rpath;
895 int error = -1;
896
897 /*
898 * The interpreter for shell scripts run from a linux binary needs
899 * to be located in /compat/linux if possible in order to recursively
900 * maintain linux path emulation.
901 */
902 if (((const short *)head)[0] == SHELLMAGIC) {
903 /*
904 * Run our normal shell image activator. If it succeeds attempt
905 * to use the alternate path for the interpreter. If an alternate
906 * path is found, use our stringspace to store it.
907 */
908 if ((error = exec_shell_imgact(imgp)) == 0) {
909 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
910 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
911 if (rpath != NULL)
912 imgp->args->fname_buf =
913 imgp->interpreter_name = rpath;
914 }
915 }
916 return (error);
917}
918
919/*
920 * exec_setregs may initialize some registers differently than Linux
921 * does, thus potentially confusing Linux binaries. If necessary, we
922 * override the exec_setregs default(s) here.
923 */
924static void
925exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
926{
927 struct pcb *pcb = td->td_pcb;
928
929 exec_setregs(td, imgp, stack);
930
931 /* Linux sets %gs to 0, we default to _udatasel */
932 pcb->pcb_gs = 0;
933 load_gs(0);
934
935 pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
936}
937
938static void
939linux_get_machine(const char **dst)
940{
941
942 switch (cpu_class) {
943 case CPUCLASS_686:
944 *dst = "i686";
945 break;
946 case CPUCLASS_586:
947 *dst = "i586";
948 break;
949 case CPUCLASS_486:
950 *dst = "i486";
951 break;
952 default:
953 *dst = "i386";
954 }
955}
956
957struct sysentvec linux_sysvec = {
958 .sv_size = LINUX_SYS_MAXSYSCALL,
959 .sv_table = linux_sysent,
960 .sv_mask = 0,
961 .sv_sigsize = LINUX_SIGTBLSZ,
962 .sv_sigtbl = bsd_to_linux_signal,
963 .sv_errsize = ELAST + 1,
964 .sv_errtbl = bsd_to_linux_errno,
965 .sv_transtrap = translate_traps,
966 .sv_fixup = linux_fixup,
967 .sv_sendsig = linux_sendsig,
968 .sv_sigcode = &_binary_linux_locore_o_start,
969 .sv_szsigcode = &linux_szsigcode,
970 .sv_prepsyscall = NULL,
971 .sv_name = "Linux a.out",
972 .sv_coredump = NULL,
973 .sv_imgact_try = exec_linux_imgact_try,
974 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
975 .sv_pagesize = PAGE_SIZE,
976 .sv_minuser = VM_MIN_ADDRESS,
977 .sv_maxuser = VM_MAXUSER_ADDRESS,
978 .sv_usrstack = LINUX_USRSTACK,
979 .sv_psstrings = PS_STRINGS,
980 .sv_stackprot = VM_PROT_ALL,
981 .sv_copyout_strings = exec_copyout_strings,
982 .sv_setregs = exec_linux_setregs,
983 .sv_fixlimit = NULL,
984 .sv_maxssiz = NULL,
985 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
986 .sv_set_syscall_retval = cpu_set_syscall_retval,
987 .sv_fetch_syscall_args = linux_fetch_syscall_args,
988 .sv_syscallnames = NULL,
989 .sv_shared_page_base = LINUX_SHAREDPAGE,
990 .sv_shared_page_len = PAGE_SIZE,
991 .sv_schedtail = linux_schedtail,
992 .sv_thread_detach = linux_thread_detach,
993};
994INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
995
996struct sysentvec elf_linux_sysvec = {
997 .sv_size = LINUX_SYS_MAXSYSCALL,
998 .sv_table = linux_sysent,
999 .sv_mask = 0,
1000 .sv_sigsize = LINUX_SIGTBLSZ,
1001 .sv_sigtbl = bsd_to_linux_signal,
1002 .sv_errsize = ELAST + 1,
1003 .sv_errtbl = bsd_to_linux_errno,
1004 .sv_transtrap = translate_traps,
1005 .sv_fixup = elf_linux_fixup,
1006 .sv_sendsig = linux_sendsig,
1007 .sv_sigcode = &_binary_linux_locore_o_start,
1008 .sv_szsigcode = &linux_szsigcode,
1009 .sv_prepsyscall = NULL,
1010 .sv_name = "Linux ELF",
1011 .sv_coredump = elf32_coredump,
1012 .sv_imgact_try = exec_linux_imgact_try,
1013 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1014 .sv_pagesize = PAGE_SIZE,
1015 .sv_minuser = VM_MIN_ADDRESS,
1016 .sv_maxuser = VM_MAXUSER_ADDRESS,
1017 .sv_usrstack = LINUX_USRSTACK,
1018 .sv_psstrings = LINUX_PS_STRINGS,
1019 .sv_stackprot = VM_PROT_ALL,
1020 .sv_copyout_strings = linux_copyout_strings,
1021 .sv_setregs = exec_linux_setregs,
1022 .sv_fixlimit = NULL,
1023 .sv_maxssiz = NULL,
1024 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1025 .sv_set_syscall_retval = cpu_set_syscall_retval,
1026 .sv_fetch_syscall_args = linux_fetch_syscall_args,
1027 .sv_syscallnames = NULL,
1028 .sv_shared_page_base = LINUX_SHAREDPAGE,
1029 .sv_shared_page_len = PAGE_SIZE,
1030 .sv_schedtail = linux_schedtail,
1031 .sv_thread_detach = linux_thread_detach,
1032};
1033
1034static void
1035linux_vdso_install(void *param)
1036{
1037
1038 linux_szsigcode = (&_binary_linux_locore_o_end -
1039 &_binary_linux_locore_o_start);
1040
1041 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
1042 panic("Linux invalid vdso size\n");
1043
1044 __elfN(linux_vdso_fixup)(&elf_linux_sysvec);
1045
1046 linux_shared_page_obj = __elfN(linux_shared_page_init)
1047 (&linux_shared_page_mapping);
1048
1049 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX_SHAREDPAGE);
1050
1051 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
1052 linux_szsigcode);
1053 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
1054}
1055SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
1056 (sysinit_cfunc_t)linux_vdso_install, NULL);
1057
1058static void
1059linux_vdso_deinstall(void *param)
1060{
1061
1062 __elfN(linux_shared_page_fini)(linux_shared_page_obj);
1063};
1064SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
1065 (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
1066
1067static char GNU_ABI_VENDOR[] = "GNU";
1068static int GNULINUX_ABI_DESC = 0;
1069
1070static boolean_t
1071linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1072{
1073 const Elf32_Word *desc;
1074 uintptr_t p;
1075
1076 p = (uintptr_t)(note + 1);
1077 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1078
1079 desc = (const Elf32_Word *)p;
1080 if (desc[0] != GNULINUX_ABI_DESC)
1081 return (FALSE);
1082
1083 /*
1084 * For linux we encode osrel as follows (see linux_mib.c):
1085 * VVVMMMIII (version, major, minor), see linux_mib.c.
1086 */
1087 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1088
1089 return (TRUE);
1090}
1091
1092static Elf_Brandnote linux_brandnote = {
1093 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1094 .hdr.n_descsz = 16, /* XXX at least 16 */
1095 .hdr.n_type = 1,
1096 .vendor = GNU_ABI_VENDOR,
1097 .flags = BN_TRANSLATE_OSREL,
1098 .trans_osrel = linux_trans_osrel
1099};
1100
1101static Elf32_Brandinfo linux_brand = {
1102 .brand = ELFOSABI_LINUX,
1103 .machine = EM_386,
1104 .compat_3_brand = "Linux",
1105 .emul_path = "/compat/linux",
1106 .interp_path = "/lib/ld-linux.so.1",
1107 .sysvec = &elf_linux_sysvec,
1108 .interp_newpath = NULL,
1109 .brand_note = &linux_brandnote,
1110 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1111};
1112
1113static Elf32_Brandinfo linux_glibc2brand = {
1114 .brand = ELFOSABI_LINUX,
1115 .machine = EM_386,
1116 .compat_3_brand = "Linux",
1117 .emul_path = "/compat/linux",
1118 .interp_path = "/lib/ld-linux.so.2",
1119 .sysvec = &elf_linux_sysvec,
1120 .interp_newpath = NULL,
1121 .brand_note = &linux_brandnote,
1122 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1123};
1124
1125Elf32_Brandinfo *linux_brandlist[] = {
1126 &linux_brand,
1127 &linux_glibc2brand,
1128 NULL
1129};
1130
1131static int
1132linux_elf_modevent(module_t mod, int type, void *data)
1133{
1134 Elf32_Brandinfo **brandinfo;
1135 int error;
1136 struct linux_ioctl_handler **lihp;
1132 struct linux_device_handler **ldhp;
1133
1134 error = 0;
1135
1136 switch(type) {
1137 case MOD_LOAD:
1138 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1139 ++brandinfo)
1140 if (elf32_insert_brand_entry(*brandinfo) < 0)
1141 error = EINVAL;
1142 if (error == 0) {
1143 SET_FOREACH(lihp, linux_ioctl_handler_set)
1144 linux_ioctl_register_handler(*lihp);
1137
1138 error = 0;
1139
1140 switch(type) {
1141 case MOD_LOAD:
1142 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1143 ++brandinfo)
1144 if (elf32_insert_brand_entry(*brandinfo) < 0)
1145 error = EINVAL;
1146 if (error == 0) {
1147 SET_FOREACH(lihp, linux_ioctl_handler_set)
1148 linux_ioctl_register_handler(*lihp);
1145 SET_FOREACH(ldhp, linux_device_handler_set)
1146 linux_device_register_handler(*ldhp);
1147 LIST_INIT(&futex_list);
1148 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1149 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1150 NULL, 1000);
1151 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1152 NULL, 1000);
1153 linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1154 linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1155 linux_get_machine(&linux_kplatform);
1156 linux_szplatform = roundup(strlen(linux_kplatform) + 1,
1157 sizeof(char *));
1158 linux_osd_jail_register();
1159 stclohz = (stathz ? stathz : hz);
1160 if (bootverbose)
1161 printf("Linux ELF exec handler installed\n");
1162 } else
1163 printf("cannot insert Linux ELF brand handler\n");
1164 break;
1165 case MOD_UNLOAD:
1166 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1167 ++brandinfo)
1168 if (elf32_brand_inuse(*brandinfo))
1169 error = EBUSY;
1170 if (error == 0) {
1171 for (brandinfo = &linux_brandlist[0];
1172 *brandinfo != NULL; ++brandinfo)
1173 if (elf32_remove_brand_entry(*brandinfo) < 0)
1174 error = EINVAL;
1175 }
1176 if (error == 0) {
1177 SET_FOREACH(lihp, linux_ioctl_handler_set)
1178 linux_ioctl_unregister_handler(*lihp);
1149 LIST_INIT(&futex_list);
1150 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1151 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1152 NULL, 1000);
1153 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1154 NULL, 1000);
1155 linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1156 linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1157 linux_get_machine(&linux_kplatform);
1158 linux_szplatform = roundup(strlen(linux_kplatform) + 1,
1159 sizeof(char *));
1160 linux_osd_jail_register();
1161 stclohz = (stathz ? stathz : hz);
1162 if (bootverbose)
1163 printf("Linux ELF exec handler installed\n");
1164 } else
1165 printf("cannot insert Linux ELF brand handler\n");
1166 break;
1167 case MOD_UNLOAD:
1168 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1169 ++brandinfo)
1170 if (elf32_brand_inuse(*brandinfo))
1171 error = EBUSY;
1172 if (error == 0) {
1173 for (brandinfo = &linux_brandlist[0];
1174 *brandinfo != NULL; ++brandinfo)
1175 if (elf32_remove_brand_entry(*brandinfo) < 0)
1176 error = EINVAL;
1177 }
1178 if (error == 0) {
1179 SET_FOREACH(lihp, linux_ioctl_handler_set)
1180 linux_ioctl_unregister_handler(*lihp);
1179 SET_FOREACH(ldhp, linux_device_handler_set)
1180 linux_device_unregister_handler(*ldhp);
1181 mtx_destroy(&futex_mtx);
1182 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1183 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1184 EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1185 linux_osd_jail_deregister();
1186 if (bootverbose)
1187 printf("Linux ELF exec handler removed\n");
1188 } else
1189 printf("Could not deinstall ELF interpreter entry\n");
1190 break;
1191 default:
1192 return (EOPNOTSUPP);
1193 }
1194 return (error);
1195}
1196
1197static moduledata_t linux_elf_mod = {
1198 "linuxelf",
1199 linux_elf_modevent,
1200 0
1201};
1202
1203DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1181 mtx_destroy(&futex_mtx);
1182 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1183 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1184 EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1185 linux_osd_jail_deregister();
1186 if (bootverbose)
1187 printf("Linux ELF exec handler removed\n");
1188 } else
1189 printf("Could not deinstall ELF interpreter entry\n");
1190 break;
1191 default:
1192 return (EOPNOTSUPP);
1193 }
1194 return (error);
1195}
1196
1197static moduledata_t linux_elf_mod = {
1198 "linuxelf",
1199 linux_elf_modevent,
1200 0
1201};
1202
1203DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);