Deleted Added
full compact
linux32_sysvec.c (283411) linux32_sysvec.c (283421)
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 S��ren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 S��ren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/amd64/linux32/linux32_sysvec.c 283411 2015-05-24 15:32:52Z dchagin $");
34__FBSDID("$FreeBSD: head/sys/amd64/linux32/linux32_sysvec.c 283421 2015-05-24 15:51:18Z dchagin $");
35#include "opt_compat.h"
36
37#ifndef COMPAT_FREEBSD32
38#error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
39#endif
40
41#define __ELF_WORD_SIZE 32
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/exec.h>
46#include <sys/fcntl.h>
47#include <sys/imgact.h>
48#include <sys/imgact_elf.h>
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/module.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/signalvar.h>
57#include <sys/sysctl.h>
58#include <sys/syscallsubr.h>
59#include <sys/sysent.h>
60#include <sys/sysproto.h>
61#include <sys/vnode.h>
62#include <sys/eventhandler.h>
63
64#include <vm/vm.h>
65#include <vm/pmap.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_page.h>
70#include <vm/vm_param.h>
71
72#include <machine/cpu.h>
73#include <machine/md_var.h>
74#include <machine/pcb.h>
75#include <machine/specialreg.h>
76
77#include <amd64/linux32/linux.h>
78#include <amd64/linux32/linux32_proto.h>
79#include <compat/linux/linux_emul.h>
80#include <compat/linux/linux_futex.h>
81#include <compat/linux/linux_ioctl.h>
82#include <compat/linux/linux_mib.h>
83#include <compat/linux/linux_misc.h>
84#include <compat/linux/linux_signal.h>
85#include <compat/linux/linux_util.h>
86#include <compat/linux/linux_vdso.h>
87
88MODULE_VERSION(linux, 1);
89
35#include "opt_compat.h"
36
37#ifndef COMPAT_FREEBSD32
38#error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
39#endif
40
41#define __ELF_WORD_SIZE 32
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/exec.h>
46#include <sys/fcntl.h>
47#include <sys/imgact.h>
48#include <sys/imgact_elf.h>
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/module.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/signalvar.h>
57#include <sys/sysctl.h>
58#include <sys/syscallsubr.h>
59#include <sys/sysent.h>
60#include <sys/sysproto.h>
61#include <sys/vnode.h>
62#include <sys/eventhandler.h>
63
64#include <vm/vm.h>
65#include <vm/pmap.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_page.h>
70#include <vm/vm_param.h>
71
72#include <machine/cpu.h>
73#include <machine/md_var.h>
74#include <machine/pcb.h>
75#include <machine/specialreg.h>
76
77#include <amd64/linux32/linux.h>
78#include <amd64/linux32/linux32_proto.h>
79#include <compat/linux/linux_emul.h>
80#include <compat/linux/linux_futex.h>
81#include <compat/linux/linux_ioctl.h>
82#include <compat/linux/linux_mib.h>
83#include <compat/linux/linux_misc.h>
84#include <compat/linux/linux_signal.h>
85#include <compat/linux/linux_util.h>
86#include <compat/linux/linux_vdso.h>
87
88MODULE_VERSION(linux, 1);
89
90MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
91
92#define AUXARGS_ENTRY_32(pos, id, val) \
93 do { \
94 suword32(pos++, id); \
95 suword32(pos++, val); \
96 } while (0)
97
98#if BYTE_ORDER == LITTLE_ENDIAN
99#define SHELLMAGIC 0x2123 /* #! */
100#else
101#define SHELLMAGIC 0x2321
102#endif
103
104/*
105 * Allow the sendsig functions to use the ldebug() facility
106 * even though they are not syscalls themselves. Map them
107 * to syscall 0. This is slightly less bogus than using
108 * ldebug(sigreturn).
109 */
110#define LINUX_SYS_linux_rt_sendsig 0
111#define LINUX_SYS_linux_sendsig 0
112
113const char *linux_kplatform;
114static int linux_szsigcode;
115static vm_object_t linux_shared_page_obj;
116static char *linux_shared_page_mapping;
117extern char _binary_linux32_locore_o_start;
118extern char _binary_linux32_locore_o_end;
119
120extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
121
122SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
90#define AUXARGS_ENTRY_32(pos, id, val) \
91 do { \
92 suword32(pos++, id); \
93 suword32(pos++, val); \
94 } while (0)
95
96#if BYTE_ORDER == LITTLE_ENDIAN
97#define SHELLMAGIC 0x2123 /* #! */
98#else
99#define SHELLMAGIC 0x2321
100#endif
101
102/*
103 * Allow the sendsig functions to use the ldebug() facility
104 * even though they are not syscalls themselves. Map them
105 * to syscall 0. This is slightly less bogus than using
106 * ldebug(sigreturn).
107 */
108#define LINUX_SYS_linux_rt_sendsig 0
109#define LINUX_SYS_linux_sendsig 0
110
111const char *linux_kplatform;
112static int linux_szsigcode;
113static vm_object_t linux_shared_page_obj;
114static char *linux_shared_page_mapping;
115extern char _binary_linux32_locore_o_start;
116extern char _binary_linux32_locore_o_end;
117
118extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
119
120SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
123SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
124
125static int elf_linux_fixup(register_t **stack_base,
126 struct image_params *iparams);
127static register_t *linux_copyout_strings(struct image_params *imgp);
128static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
129static void exec_linux_setregs(struct thread *td,
130 struct image_params *imgp, u_long stack);
131static void linux32_fixlimit(struct rlimit *rl, int which);
132static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
133static void linux_vdso_install(void *param);
134static void linux_vdso_deinstall(void *param);
135
136static eventhandler_tag linux_exit_tag;
137static eventhandler_tag linux_exec_tag;
138static eventhandler_tag linux_thread_dtor_tag;
139
140/*
141 * Linux syscalls return negative errno's, we do positive and map them
142 * Reference:
143 * FreeBSD: src/sys/sys/errno.h
144 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
145 * linux-2.6.17.8/include/asm-generic/errno.h
146 */
147static int bsd_to_linux_errno[ELAST + 1] = {
148 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
149 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
150 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
151 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
152 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
153 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
154 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
155 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
156 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
157 -72, -67, -71
158};
159
160int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
161 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
162 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
163 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
164 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
165 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
166 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
167 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
168 0, LINUX_SIGUSR1, LINUX_SIGUSR2
169};
170
171int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
172 SIGHUP, SIGINT, SIGQUIT, SIGILL,
173 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
174 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
175 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
176 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
177 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
178 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
179 SIGIO, SIGURG, SIGSYS
180};
181
182#define LINUX_T_UNKNOWN 255
183static int _bsd_to_linux_trapcode[] = {
184 LINUX_T_UNKNOWN, /* 0 */
185 6, /* 1 T_PRIVINFLT */
186 LINUX_T_UNKNOWN, /* 2 */
187 3, /* 3 T_BPTFLT */
188 LINUX_T_UNKNOWN, /* 4 */
189 LINUX_T_UNKNOWN, /* 5 */
190 16, /* 6 T_ARITHTRAP */
191 254, /* 7 T_ASTFLT */
192 LINUX_T_UNKNOWN, /* 8 */
193 13, /* 9 T_PROTFLT */
194 1, /* 10 T_TRCTRAP */
195 LINUX_T_UNKNOWN, /* 11 */
196 14, /* 12 T_PAGEFLT */
197 LINUX_T_UNKNOWN, /* 13 */
198 17, /* 14 T_ALIGNFLT */
199 LINUX_T_UNKNOWN, /* 15 */
200 LINUX_T_UNKNOWN, /* 16 */
201 LINUX_T_UNKNOWN, /* 17 */
202 0, /* 18 T_DIVIDE */
203 2, /* 19 T_NMI */
204 4, /* 20 T_OFLOW */
205 5, /* 21 T_BOUND */
206 7, /* 22 T_DNA */
207 8, /* 23 T_DOUBLEFLT */
208 9, /* 24 T_FPOPFLT */
209 10, /* 25 T_TSSFLT */
210 11, /* 26 T_SEGNPFLT */
211 12, /* 27 T_STKFLT */
212 18, /* 28 T_MCHK */
213 19, /* 29 T_XMMFLT */
214 15 /* 30 T_RESERVED */
215};
216#define bsd_to_linux_trapcode(code) \
217 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
218 _bsd_to_linux_trapcode[(code)]: \
219 LINUX_T_UNKNOWN)
220
221struct linux32_ps_strings {
222 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
223 u_int ps_nargvstr; /* the number of argument strings */
224 u_int32_t ps_envstr; /* first of 0 or more environment strings */
225 u_int ps_nenvstr; /* the number of environment strings */
226};
227
228LINUX_VDSO_SYM_INTPTR(linux32_sigcode);
229LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode);
230LINUX_VDSO_SYM_INTPTR(linux32_vsyscall);
231LINUX_VDSO_SYM_CHAR(linux_platform);
232
233/*
234 * If FreeBSD & Linux have a difference of opinion about what a trap
235 * means, deal with it here.
236 *
237 * MPSAFE
238 */
239static int
240translate_traps(int signal, int trap_code)
241{
242 if (signal != SIGBUS)
243 return signal;
244 switch (trap_code) {
245 case T_PROTFLT:
246 case T_TSSFLT:
247 case T_DOUBLEFLT:
248 case T_PAGEFLT:
249 return SIGSEGV;
250 default:
251 return signal;
252 }
253}
254
255static int
256elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
257{
258 Elf32_Auxargs *args;
259 Elf32_Addr *base;
260 Elf32_Addr *pos;
261 struct linux32_ps_strings *arginfo;
262
263 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
264
265 KASSERT(curthread->td_proc == imgp->proc,
266 ("unsafe elf_linux_fixup(), should be curproc"));
267 base = (Elf32_Addr *)*stack_base;
268 args = (Elf32_Auxargs *)imgp->auxargs;
269 pos = base + (imgp->args->argc + imgp->args->envc + 2);
270
271 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR,
272 imgp->proc->p_sysent->sv_shared_page_base);
273 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall);
274 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
275
276 /*
277 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
278 * as it has appeared in the 2.4.0-rc7 first time.
279 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
280 * glibc falls back to the hard-coded CLK_TCK value when aux entry
281 * is not present.
282 * Also see linux_times() implementation.
283 */
284 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
285 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
286 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
287 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
288 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
289 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
290 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
291 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
292 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
293 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
294 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
295 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
296 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
297 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
298 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
299 if (args->execfd != -1)
300 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
301 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
302
303 free(imgp->auxargs, M_TEMP);
304 imgp->auxargs = NULL;
305
306 base--;
307 suword32(base, (uint32_t)imgp->args->argc);
308 *stack_base = (register_t *)base;
309 return (0);
310}
311
312static void
313linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
314{
315 struct thread *td = curthread;
316 struct proc *p = td->td_proc;
317 struct sigacts *psp;
318 struct trapframe *regs;
319 struct l_rt_sigframe *fp, frame;
320 int oonstack;
321 int sig;
322 int code;
323
324 sig = ksi->ksi_signo;
325 code = ksi->ksi_code;
326 PROC_LOCK_ASSERT(p, MA_OWNED);
327 psp = p->p_sigacts;
328 mtx_assert(&psp->ps_mtx, MA_OWNED);
329 regs = td->td_frame;
330 oonstack = sigonstack(regs->tf_rsp);
331
332#ifdef DEBUG
333 if (ldebug(rt_sendsig))
334 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
335 catcher, sig, (void*)mask, code);
336#endif
337 /*
338 * Allocate space for the signal handler context.
339 */
340 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
341 SIGISMEMBER(psp->ps_sigonstack, sig)) {
342 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
343 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
344 } else
345 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
346 mtx_unlock(&psp->ps_mtx);
347
348 /*
349 * Build the argument list for the signal handler.
350 */
351 if (p->p_sysent->sv_sigtbl)
352 if (sig <= p->p_sysent->sv_sigsize)
353 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
354
355 bzero(&frame, sizeof(frame));
356
357 frame.sf_handler = PTROUT(catcher);
358 frame.sf_sig = sig;
359 frame.sf_siginfo = PTROUT(&fp->sf_si);
360 frame.sf_ucontext = PTROUT(&fp->sf_sc);
361
362 /* Fill in POSIX parts */
363 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
364
365 /*
366 * Build the signal context to be used by sigreturn
367 * and libgcc unwind.
368 */
369 frame.sf_sc.uc_flags = 0; /* XXX ??? */
370 frame.sf_sc.uc_link = 0; /* XXX ??? */
371
372 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
373 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
374 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
375 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
376 PROC_UNLOCK(p);
377
378 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
379
380 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
381 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
382 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
383 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
384 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
385 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp;
386 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
387 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
388 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
389 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
390 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
391 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
392 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
393 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
394 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
395 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
396 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
397 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
398 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
399 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
400 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
401
402#ifdef DEBUG
403 if (ldebug(rt_sendsig))
404 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
405 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
406 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
407#endif
408
409 if (copyout(&frame, fp, sizeof(frame)) != 0) {
410 /*
411 * Process has trashed its stack; give it an illegal
412 * instruction to halt it in its tracks.
413 */
414#ifdef DEBUG
415 if (ldebug(rt_sendsig))
416 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
417 fp, oonstack);
418#endif
419 PROC_LOCK(p);
420 sigexit(td, SIGILL);
421 }
422
423 /*
424 * Build context to run handler in.
425 */
426 regs->tf_rsp = PTROUT(fp);
427 regs->tf_rip = linux32_rt_sigcode;
428 regs->tf_rflags &= ~(PSL_T | PSL_D);
429 regs->tf_cs = _ucode32sel;
430 regs->tf_ss = _udatasel;
431 regs->tf_ds = _udatasel;
432 regs->tf_es = _udatasel;
433 regs->tf_fs = _ufssel;
434 regs->tf_gs = _ugssel;
435 regs->tf_flags = TF_HASSEGS;
436 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
437 PROC_LOCK(p);
438 mtx_lock(&psp->ps_mtx);
439}
440
441
442/*
443 * Send an interrupt to process.
444 *
445 * Stack is set up to allow sigcode stored
446 * in u. to call routine, followed by kcall
447 * to sigreturn routine below. After sigreturn
448 * resets the signal mask, the stack, and the
449 * frame pointer, it returns to the user
450 * specified pc, psl.
451 */
452static void
453linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
454{
455 struct thread *td = curthread;
456 struct proc *p = td->td_proc;
457 struct sigacts *psp;
458 struct trapframe *regs;
459 struct l_sigframe *fp, frame;
460 l_sigset_t lmask;
461 int oonstack, i;
462 int sig, code;
463
464 sig = ksi->ksi_signo;
465 code = ksi->ksi_code;
466 PROC_LOCK_ASSERT(p, MA_OWNED);
467 psp = p->p_sigacts;
468 mtx_assert(&psp->ps_mtx, MA_OWNED);
469 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
470 /* Signal handler installed with SA_SIGINFO. */
471 linux_rt_sendsig(catcher, ksi, mask);
472 return;
473 }
474
475 regs = td->td_frame;
476 oonstack = sigonstack(regs->tf_rsp);
477
478#ifdef DEBUG
479 if (ldebug(sendsig))
480 printf(ARGS(sendsig, "%p, %d, %p, %u"),
481 catcher, sig, (void*)mask, code);
482#endif
483
484 /*
485 * Allocate space for the signal handler context.
486 */
487 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
488 SIGISMEMBER(psp->ps_sigonstack, sig)) {
489 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
490 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
491 } else
492 fp = (struct l_sigframe *)regs->tf_rsp - 1;
493 mtx_unlock(&psp->ps_mtx);
494 PROC_UNLOCK(p);
495
496 /*
497 * Build the argument list for the signal handler.
498 */
499 if (p->p_sysent->sv_sigtbl)
500 if (sig <= p->p_sysent->sv_sigsize)
501 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
502
503 bzero(&frame, sizeof(frame));
504
505 frame.sf_handler = PTROUT(catcher);
506 frame.sf_sig = sig;
507
508 bsd_to_linux_sigset(mask, &lmask);
509
510 /*
511 * Build the signal context to be used by sigreturn.
512 */
513 frame.sf_sc.sc_mask = lmask.__bits[0];
514 frame.sf_sc.sc_gs = regs->tf_gs;
515 frame.sf_sc.sc_fs = regs->tf_fs;
516 frame.sf_sc.sc_es = regs->tf_es;
517 frame.sf_sc.sc_ds = regs->tf_ds;
518 frame.sf_sc.sc_edi = regs->tf_rdi;
519 frame.sf_sc.sc_esi = regs->tf_rsi;
520 frame.sf_sc.sc_ebp = regs->tf_rbp;
521 frame.sf_sc.sc_ebx = regs->tf_rbx;
522 frame.sf_sc.sc_esp = regs->tf_rsp;
523 frame.sf_sc.sc_edx = regs->tf_rdx;
524 frame.sf_sc.sc_ecx = regs->tf_rcx;
525 frame.sf_sc.sc_eax = regs->tf_rax;
526 frame.sf_sc.sc_eip = regs->tf_rip;
527 frame.sf_sc.sc_cs = regs->tf_cs;
528 frame.sf_sc.sc_eflags = regs->tf_rflags;
529 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
530 frame.sf_sc.sc_ss = regs->tf_ss;
531 frame.sf_sc.sc_err = regs->tf_err;
532 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
533 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
534
535 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
536 frame.sf_extramask[i] = lmask.__bits[i+1];
537
538 if (copyout(&frame, fp, sizeof(frame)) != 0) {
539 /*
540 * Process has trashed its stack; give it an illegal
541 * instruction to halt it in its tracks.
542 */
543 PROC_LOCK(p);
544 sigexit(td, SIGILL);
545 }
546
547 /*
548 * Build context to run handler in.
549 */
550 regs->tf_rsp = PTROUT(fp);
551 regs->tf_rip = linux32_sigcode;
552 regs->tf_rflags &= ~(PSL_T | PSL_D);
553 regs->tf_cs = _ucode32sel;
554 regs->tf_ss = _udatasel;
555 regs->tf_ds = _udatasel;
556 regs->tf_es = _udatasel;
557 regs->tf_fs = _ufssel;
558 regs->tf_gs = _ugssel;
559 regs->tf_flags = TF_HASSEGS;
560 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
561 PROC_LOCK(p);
562 mtx_lock(&psp->ps_mtx);
563}
564
565/*
566 * System call to cleanup state after a signal
567 * has been taken. Reset signal mask and
568 * stack state from context left by sendsig (above).
569 * Return to previous pc and psl as specified by
570 * context left by sendsig. Check carefully to
571 * make sure that the user has not modified the
572 * psl to gain improper privileges or to cause
573 * a machine fault.
574 */
575int
576linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
577{
578 struct l_sigframe frame;
579 struct trapframe *regs;
580 sigset_t bmask;
581 l_sigset_t lmask;
582 int eflags, i;
583 ksiginfo_t ksi;
584
585 regs = td->td_frame;
586
587#ifdef DEBUG
588 if (ldebug(sigreturn))
589 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
590#endif
591 /*
592 * The trampoline code hands us the sigframe.
593 * It is unsafe to keep track of it ourselves, in the event that a
594 * program jumps out of a signal handler.
595 */
596 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
597 return (EFAULT);
598
599 /*
600 * Check for security violations.
601 */
602#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
603 eflags = frame.sf_sc.sc_eflags;
604 if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
605 return(EINVAL);
606
607 /*
608 * Don't allow users to load a valid privileged %cs. Let the
609 * hardware check for invalid selectors, excess privilege in
610 * other selectors, invalid %eip's and invalid %esp's.
611 */
612#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
613 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
614 ksiginfo_init_trap(&ksi);
615 ksi.ksi_signo = SIGBUS;
616 ksi.ksi_code = BUS_OBJERR;
617 ksi.ksi_trapno = T_PROTFLT;
618 ksi.ksi_addr = (void *)regs->tf_rip;
619 trapsignal(td, &ksi);
620 return(EINVAL);
621 }
622
623 lmask.__bits[0] = frame.sf_sc.sc_mask;
624 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
625 lmask.__bits[i+1] = frame.sf_extramask[i];
626 linux_to_bsd_sigset(&lmask, &bmask);
627 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
628
629 /*
630 * Restore signal context.
631 */
632 regs->tf_rdi = frame.sf_sc.sc_edi;
633 regs->tf_rsi = frame.sf_sc.sc_esi;
634 regs->tf_rbp = frame.sf_sc.sc_ebp;
635 regs->tf_rbx = frame.sf_sc.sc_ebx;
636 regs->tf_rdx = frame.sf_sc.sc_edx;
637 regs->tf_rcx = frame.sf_sc.sc_ecx;
638 regs->tf_rax = frame.sf_sc.sc_eax;
639 regs->tf_rip = frame.sf_sc.sc_eip;
640 regs->tf_cs = frame.sf_sc.sc_cs;
641 regs->tf_ds = frame.sf_sc.sc_ds;
642 regs->tf_es = frame.sf_sc.sc_es;
643 regs->tf_fs = frame.sf_sc.sc_fs;
644 regs->tf_gs = frame.sf_sc.sc_gs;
645 regs->tf_rflags = eflags;
646 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
647 regs->tf_ss = frame.sf_sc.sc_ss;
648 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
649
650 return (EJUSTRETURN);
651}
652
653/*
654 * System call to cleanup state after a signal
655 * has been taken. Reset signal mask and
656 * stack state from context left by rt_sendsig (above).
657 * Return to previous pc and psl as specified by
658 * context left by sendsig. Check carefully to
659 * make sure that the user has not modified the
660 * psl to gain improper privileges or to cause
661 * a machine fault.
662 */
663int
664linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
665{
666 struct l_ucontext uc;
667 struct l_sigcontext *context;
668 sigset_t bmask;
669 l_stack_t *lss;
670 stack_t ss;
671 struct trapframe *regs;
672 int eflags;
673 ksiginfo_t ksi;
674
675 regs = td->td_frame;
676
677#ifdef DEBUG
678 if (ldebug(rt_sigreturn))
679 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
680#endif
681 /*
682 * The trampoline code hands us the ucontext.
683 * It is unsafe to keep track of it ourselves, in the event that a
684 * program jumps out of a signal handler.
685 */
686 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
687 return (EFAULT);
688
689 context = &uc.uc_mcontext;
690
691 /*
692 * Check for security violations.
693 */
694#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
695 eflags = context->sc_eflags;
696 if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
697 return(EINVAL);
698
699 /*
700 * Don't allow users to load a valid privileged %cs. Let the
701 * hardware check for invalid selectors, excess privilege in
702 * other selectors, invalid %eip's and invalid %esp's.
703 */
704#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
705 if (!CS_SECURE(context->sc_cs)) {
706 ksiginfo_init_trap(&ksi);
707 ksi.ksi_signo = SIGBUS;
708 ksi.ksi_code = BUS_OBJERR;
709 ksi.ksi_trapno = T_PROTFLT;
710 ksi.ksi_addr = (void *)regs->tf_rip;
711 trapsignal(td, &ksi);
712 return(EINVAL);
713 }
714
715 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
716 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
717
718 /*
719 * Restore signal context
720 */
721 regs->tf_gs = context->sc_gs;
722 regs->tf_fs = context->sc_fs;
723 regs->tf_es = context->sc_es;
724 regs->tf_ds = context->sc_ds;
725 regs->tf_rdi = context->sc_edi;
726 regs->tf_rsi = context->sc_esi;
727 regs->tf_rbp = context->sc_ebp;
728 regs->tf_rbx = context->sc_ebx;
729 regs->tf_rdx = context->sc_edx;
730 regs->tf_rcx = context->sc_ecx;
731 regs->tf_rax = context->sc_eax;
732 regs->tf_rip = context->sc_eip;
733 regs->tf_cs = context->sc_cs;
734 regs->tf_rflags = eflags;
735 regs->tf_rsp = context->sc_esp_at_signal;
736 regs->tf_ss = context->sc_ss;
737 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
738
739 /*
740 * call sigaltstack & ignore results..
741 */
742 lss = &uc.uc_stack;
743 ss.ss_sp = PTRIN(lss->ss_sp);
744 ss.ss_size = lss->ss_size;
745 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
746
747#ifdef DEBUG
748 if (ldebug(rt_sigreturn))
749 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
750 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
751#endif
752 (void)kern_sigaltstack(td, &ss, NULL);
753
754 return (EJUSTRETURN);
755}
756
757static int
758linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
759{
760 struct proc *p;
761 struct trapframe *frame;
762
763 p = td->td_proc;
764 frame = td->td_frame;
765
766 sa->args[0] = frame->tf_rbx;
767 sa->args[1] = frame->tf_rcx;
768 sa->args[2] = frame->tf_rdx;
769 sa->args[3] = frame->tf_rsi;
770 sa->args[4] = frame->tf_rdi;
771 sa->args[5] = frame->tf_rbp; /* Unconfirmed */
772 sa->code = frame->tf_rax;
773
774 if (sa->code >= p->p_sysent->sv_size)
775 sa->callp = &p->p_sysent->sv_table[0];
776 else
777 sa->callp = &p->p_sysent->sv_table[sa->code];
778 sa->narg = sa->callp->sy_narg;
779
780 td->td_retval[0] = 0;
781 td->td_retval[1] = frame->tf_rdx;
782
783 return (0);
784}
785
786/*
787 * If a linux binary is exec'ing something, try this image activator
788 * first. We override standard shell script execution in order to
789 * be able to modify the interpreter path. We only do this if a linux
790 * binary is doing the exec, so we do not create an EXEC module for it.
791 */
792static int exec_linux_imgact_try(struct image_params *iparams);
793
794static int
795exec_linux_imgact_try(struct image_params *imgp)
796{
797 const char *head = (const char *)imgp->image_header;
798 char *rpath;
799 int error = -1;
800
801 /*
802 * The interpreter for shell scripts run from a linux binary needs
803 * to be located in /compat/linux if possible in order to recursively
804 * maintain linux path emulation.
805 */
806 if (((const short *)head)[0] == SHELLMAGIC) {
807 /*
808 * Run our normal shell image activator. If it succeeds attempt
809 * to use the alternate path for the interpreter. If an
810 * alternate * path is found, use our stringspace to store it.
811 */
812 if ((error = exec_shell_imgact(imgp)) == 0) {
813 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
814 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
815 AT_FDCWD);
816 if (rpath != NULL)
817 imgp->args->fname_buf =
818 imgp->interpreter_name = rpath;
819 }
820 }
821 return (error);
822}
823
824/*
825 * Clear registers on exec
826 * XXX copied from ia32_signal.c.
827 */
828static void
829exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
830{
831 struct trapframe *regs = td->td_frame;
832 struct pcb *pcb = td->td_pcb;
833
834 mtx_lock(&dt_lock);
835 if (td->td_proc->p_md.md_ldt != NULL)
836 user_ldt_free(td);
837 else
838 mtx_unlock(&dt_lock);
839
840 critical_enter();
841 wrmsr(MSR_FSBASE, 0);
842 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
843 pcb->pcb_fsbase = 0;
844 pcb->pcb_gsbase = 0;
845 critical_exit();
846 pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
847
848 bzero((char *)regs, sizeof(struct trapframe));
849 regs->tf_rip = imgp->entry_addr;
850 regs->tf_rsp = stack;
851 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
852 regs->tf_gs = _ugssel;
853 regs->tf_fs = _ufssel;
854 regs->tf_es = _udatasel;
855 regs->tf_ds = _udatasel;
856 regs->tf_ss = _udatasel;
857 regs->tf_flags = TF_HASSEGS;
858 regs->tf_cs = _ucode32sel;
859 regs->tf_rbx = imgp->ps_strings;
860
861 fpstate_drop(td);
862
863 /* Do full restore on return so that we can change to a different %cs */
864 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
865 td->td_retval[1] = 0;
866}
867
868/*
869 * XXX copied from ia32_sysvec.c.
870 */
871static register_t *
872linux_copyout_strings(struct image_params *imgp)
873{
874 int argc, envc;
875 u_int32_t *vectp;
876 char *stringp, *destp;
877 u_int32_t *stack_base;
878 struct linux32_ps_strings *arginfo;
879
880 /*
881 * Calculate string base and vector table pointers.
882 */
883 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
884 destp = (caddr_t)arginfo - SPARE_USRSPACE -
885 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
886
887 /*
888 * If we have a valid auxargs ptr, prepare some room
889 * on the stack.
890 */
891 if (imgp->auxargs) {
892 /*
893 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
894 * lower compatibility.
895 */
896 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
897 (LINUX_AT_COUNT * 2);
898 /*
899 * The '+ 2' is for the null pointers at the end of each of
900 * the arg and env vector sets,and imgp->auxarg_size is room
901 * for argument of Runtime loader.
902 */
903 vectp = (u_int32_t *) (destp - (imgp->args->argc +
904 imgp->args->envc + 2 + imgp->auxarg_size) *
905 sizeof(u_int32_t));
906
907 } else
908 /*
909 * The '+ 2' is for the null pointers at the end of each of
910 * the arg and env vector sets
911 */
912 vectp = (u_int32_t *)(destp - (imgp->args->argc +
913 imgp->args->envc + 2) * sizeof(u_int32_t));
914
915 /*
916 * vectp also becomes our initial stack base
917 */
918 stack_base = vectp;
919
920 stringp = imgp->args->begin_argv;
921 argc = imgp->args->argc;
922 envc = imgp->args->envc;
923 /*
924 * Copy out strings - arguments and environment.
925 */
926 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
927
928 /*
929 * Fill in "ps_strings" struct for ps, w, etc.
930 */
931 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
932 suword32(&arginfo->ps_nargvstr, argc);
933
934 /*
935 * Fill in argument portion of vector table.
936 */
937 for (; argc > 0; --argc) {
938 suword32(vectp++, (uint32_t)(intptr_t)destp);
939 while (*stringp++ != 0)
940 destp++;
941 destp++;
942 }
943
944 /* a null vector table pointer separates the argp's from the envp's */
945 suword32(vectp++, 0);
946
947 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
948 suword32(&arginfo->ps_nenvstr, envc);
949
950 /*
951 * Fill in environment portion of vector table.
952 */
953 for (; envc > 0; --envc) {
954 suword32(vectp++, (uint32_t)(intptr_t)destp);
955 while (*stringp++ != 0)
956 destp++;
957 destp++;
958 }
959
960 /* end of vector table is a null pointer */
961 suword32(vectp, 0);
962
963 return ((register_t *)stack_base);
964}
965
966static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
967 "32-bit Linux emulation");
968
969static u_long linux32_maxdsiz = LINUX32_MAXDSIZ;
970SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
971 &linux32_maxdsiz, 0, "");
972static u_long linux32_maxssiz = LINUX32_MAXSSIZ;
973SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
974 &linux32_maxssiz, 0, "");
975static u_long linux32_maxvmem = LINUX32_MAXVMEM;
976SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
977 &linux32_maxvmem, 0, "");
978
121
122static int elf_linux_fixup(register_t **stack_base,
123 struct image_params *iparams);
124static register_t *linux_copyout_strings(struct image_params *imgp);
125static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
126static void exec_linux_setregs(struct thread *td,
127 struct image_params *imgp, u_long stack);
128static void linux32_fixlimit(struct rlimit *rl, int which);
129static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
130static void linux_vdso_install(void *param);
131static void linux_vdso_deinstall(void *param);
132
133static eventhandler_tag linux_exit_tag;
134static eventhandler_tag linux_exec_tag;
135static eventhandler_tag linux_thread_dtor_tag;
136
137/*
138 * Linux syscalls return negative errno's, we do positive and map them
139 * Reference:
140 * FreeBSD: src/sys/sys/errno.h
141 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
142 * linux-2.6.17.8/include/asm-generic/errno.h
143 */
144static int bsd_to_linux_errno[ELAST + 1] = {
145 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
146 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
147 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
148 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
149 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
150 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
151 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
152 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
153 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
154 -72, -67, -71
155};
156
157int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
158 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
159 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
160 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
161 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
162 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
163 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
164 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
165 0, LINUX_SIGUSR1, LINUX_SIGUSR2
166};
167
168int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
169 SIGHUP, SIGINT, SIGQUIT, SIGILL,
170 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
171 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
172 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
173 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
174 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
175 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
176 SIGIO, SIGURG, SIGSYS
177};
178
179#define LINUX_T_UNKNOWN 255
180static int _bsd_to_linux_trapcode[] = {
181 LINUX_T_UNKNOWN, /* 0 */
182 6, /* 1 T_PRIVINFLT */
183 LINUX_T_UNKNOWN, /* 2 */
184 3, /* 3 T_BPTFLT */
185 LINUX_T_UNKNOWN, /* 4 */
186 LINUX_T_UNKNOWN, /* 5 */
187 16, /* 6 T_ARITHTRAP */
188 254, /* 7 T_ASTFLT */
189 LINUX_T_UNKNOWN, /* 8 */
190 13, /* 9 T_PROTFLT */
191 1, /* 10 T_TRCTRAP */
192 LINUX_T_UNKNOWN, /* 11 */
193 14, /* 12 T_PAGEFLT */
194 LINUX_T_UNKNOWN, /* 13 */
195 17, /* 14 T_ALIGNFLT */
196 LINUX_T_UNKNOWN, /* 15 */
197 LINUX_T_UNKNOWN, /* 16 */
198 LINUX_T_UNKNOWN, /* 17 */
199 0, /* 18 T_DIVIDE */
200 2, /* 19 T_NMI */
201 4, /* 20 T_OFLOW */
202 5, /* 21 T_BOUND */
203 7, /* 22 T_DNA */
204 8, /* 23 T_DOUBLEFLT */
205 9, /* 24 T_FPOPFLT */
206 10, /* 25 T_TSSFLT */
207 11, /* 26 T_SEGNPFLT */
208 12, /* 27 T_STKFLT */
209 18, /* 28 T_MCHK */
210 19, /* 29 T_XMMFLT */
211 15 /* 30 T_RESERVED */
212};
213#define bsd_to_linux_trapcode(code) \
214 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
215 _bsd_to_linux_trapcode[(code)]: \
216 LINUX_T_UNKNOWN)
217
218struct linux32_ps_strings {
219 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
220 u_int ps_nargvstr; /* the number of argument strings */
221 u_int32_t ps_envstr; /* first of 0 or more environment strings */
222 u_int ps_nenvstr; /* the number of environment strings */
223};
224
225LINUX_VDSO_SYM_INTPTR(linux32_sigcode);
226LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode);
227LINUX_VDSO_SYM_INTPTR(linux32_vsyscall);
228LINUX_VDSO_SYM_CHAR(linux_platform);
229
230/*
231 * If FreeBSD & Linux have a difference of opinion about what a trap
232 * means, deal with it here.
233 *
234 * MPSAFE
235 */
236static int
237translate_traps(int signal, int trap_code)
238{
239 if (signal != SIGBUS)
240 return signal;
241 switch (trap_code) {
242 case T_PROTFLT:
243 case T_TSSFLT:
244 case T_DOUBLEFLT:
245 case T_PAGEFLT:
246 return SIGSEGV;
247 default:
248 return signal;
249 }
250}
251
252static int
253elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
254{
255 Elf32_Auxargs *args;
256 Elf32_Addr *base;
257 Elf32_Addr *pos;
258 struct linux32_ps_strings *arginfo;
259
260 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
261
262 KASSERT(curthread->td_proc == imgp->proc,
263 ("unsafe elf_linux_fixup(), should be curproc"));
264 base = (Elf32_Addr *)*stack_base;
265 args = (Elf32_Auxargs *)imgp->auxargs;
266 pos = base + (imgp->args->argc + imgp->args->envc + 2);
267
268 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR,
269 imgp->proc->p_sysent->sv_shared_page_base);
270 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall);
271 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
272
273 /*
274 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
275 * as it has appeared in the 2.4.0-rc7 first time.
276 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
277 * glibc falls back to the hard-coded CLK_TCK value when aux entry
278 * is not present.
279 * Also see linux_times() implementation.
280 */
281 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
282 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
283 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
284 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
285 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
286 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
287 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
288 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
289 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
290 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
291 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
292 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
293 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
294 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
295 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
296 if (args->execfd != -1)
297 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
298 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
299
300 free(imgp->auxargs, M_TEMP);
301 imgp->auxargs = NULL;
302
303 base--;
304 suword32(base, (uint32_t)imgp->args->argc);
305 *stack_base = (register_t *)base;
306 return (0);
307}
308
309static void
310linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
311{
312 struct thread *td = curthread;
313 struct proc *p = td->td_proc;
314 struct sigacts *psp;
315 struct trapframe *regs;
316 struct l_rt_sigframe *fp, frame;
317 int oonstack;
318 int sig;
319 int code;
320
321 sig = ksi->ksi_signo;
322 code = ksi->ksi_code;
323 PROC_LOCK_ASSERT(p, MA_OWNED);
324 psp = p->p_sigacts;
325 mtx_assert(&psp->ps_mtx, MA_OWNED);
326 regs = td->td_frame;
327 oonstack = sigonstack(regs->tf_rsp);
328
329#ifdef DEBUG
330 if (ldebug(rt_sendsig))
331 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
332 catcher, sig, (void*)mask, code);
333#endif
334 /*
335 * Allocate space for the signal handler context.
336 */
337 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
338 SIGISMEMBER(psp->ps_sigonstack, sig)) {
339 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
340 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
341 } else
342 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
343 mtx_unlock(&psp->ps_mtx);
344
345 /*
346 * Build the argument list for the signal handler.
347 */
348 if (p->p_sysent->sv_sigtbl)
349 if (sig <= p->p_sysent->sv_sigsize)
350 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
351
352 bzero(&frame, sizeof(frame));
353
354 frame.sf_handler = PTROUT(catcher);
355 frame.sf_sig = sig;
356 frame.sf_siginfo = PTROUT(&fp->sf_si);
357 frame.sf_ucontext = PTROUT(&fp->sf_sc);
358
359 /* Fill in POSIX parts */
360 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
361
362 /*
363 * Build the signal context to be used by sigreturn
364 * and libgcc unwind.
365 */
366 frame.sf_sc.uc_flags = 0; /* XXX ??? */
367 frame.sf_sc.uc_link = 0; /* XXX ??? */
368
369 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
370 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
371 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
372 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
373 PROC_UNLOCK(p);
374
375 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
376
377 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
378 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
379 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
380 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
381 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
382 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp;
383 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
384 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
385 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
386 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
387 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
388 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
389 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
390 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
391 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
392 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
393 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
394 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
395 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
396 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
397 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
398
399#ifdef DEBUG
400 if (ldebug(rt_sendsig))
401 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
402 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
403 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
404#endif
405
406 if (copyout(&frame, fp, sizeof(frame)) != 0) {
407 /*
408 * Process has trashed its stack; give it an illegal
409 * instruction to halt it in its tracks.
410 */
411#ifdef DEBUG
412 if (ldebug(rt_sendsig))
413 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
414 fp, oonstack);
415#endif
416 PROC_LOCK(p);
417 sigexit(td, SIGILL);
418 }
419
420 /*
421 * Build context to run handler in.
422 */
423 regs->tf_rsp = PTROUT(fp);
424 regs->tf_rip = linux32_rt_sigcode;
425 regs->tf_rflags &= ~(PSL_T | PSL_D);
426 regs->tf_cs = _ucode32sel;
427 regs->tf_ss = _udatasel;
428 regs->tf_ds = _udatasel;
429 regs->tf_es = _udatasel;
430 regs->tf_fs = _ufssel;
431 regs->tf_gs = _ugssel;
432 regs->tf_flags = TF_HASSEGS;
433 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
434 PROC_LOCK(p);
435 mtx_lock(&psp->ps_mtx);
436}
437
438
439/*
440 * Send an interrupt to process.
441 *
442 * Stack is set up to allow sigcode stored
443 * in u. to call routine, followed by kcall
444 * to sigreturn routine below. After sigreturn
445 * resets the signal mask, the stack, and the
446 * frame pointer, it returns to the user
447 * specified pc, psl.
448 */
449static void
450linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
451{
452 struct thread *td = curthread;
453 struct proc *p = td->td_proc;
454 struct sigacts *psp;
455 struct trapframe *regs;
456 struct l_sigframe *fp, frame;
457 l_sigset_t lmask;
458 int oonstack, i;
459 int sig, code;
460
461 sig = ksi->ksi_signo;
462 code = ksi->ksi_code;
463 PROC_LOCK_ASSERT(p, MA_OWNED);
464 psp = p->p_sigacts;
465 mtx_assert(&psp->ps_mtx, MA_OWNED);
466 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
467 /* Signal handler installed with SA_SIGINFO. */
468 linux_rt_sendsig(catcher, ksi, mask);
469 return;
470 }
471
472 regs = td->td_frame;
473 oonstack = sigonstack(regs->tf_rsp);
474
475#ifdef DEBUG
476 if (ldebug(sendsig))
477 printf(ARGS(sendsig, "%p, %d, %p, %u"),
478 catcher, sig, (void*)mask, code);
479#endif
480
481 /*
482 * Allocate space for the signal handler context.
483 */
484 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
485 SIGISMEMBER(psp->ps_sigonstack, sig)) {
486 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
487 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
488 } else
489 fp = (struct l_sigframe *)regs->tf_rsp - 1;
490 mtx_unlock(&psp->ps_mtx);
491 PROC_UNLOCK(p);
492
493 /*
494 * Build the argument list for the signal handler.
495 */
496 if (p->p_sysent->sv_sigtbl)
497 if (sig <= p->p_sysent->sv_sigsize)
498 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
499
500 bzero(&frame, sizeof(frame));
501
502 frame.sf_handler = PTROUT(catcher);
503 frame.sf_sig = sig;
504
505 bsd_to_linux_sigset(mask, &lmask);
506
507 /*
508 * Build the signal context to be used by sigreturn.
509 */
510 frame.sf_sc.sc_mask = lmask.__bits[0];
511 frame.sf_sc.sc_gs = regs->tf_gs;
512 frame.sf_sc.sc_fs = regs->tf_fs;
513 frame.sf_sc.sc_es = regs->tf_es;
514 frame.sf_sc.sc_ds = regs->tf_ds;
515 frame.sf_sc.sc_edi = regs->tf_rdi;
516 frame.sf_sc.sc_esi = regs->tf_rsi;
517 frame.sf_sc.sc_ebp = regs->tf_rbp;
518 frame.sf_sc.sc_ebx = regs->tf_rbx;
519 frame.sf_sc.sc_esp = regs->tf_rsp;
520 frame.sf_sc.sc_edx = regs->tf_rdx;
521 frame.sf_sc.sc_ecx = regs->tf_rcx;
522 frame.sf_sc.sc_eax = regs->tf_rax;
523 frame.sf_sc.sc_eip = regs->tf_rip;
524 frame.sf_sc.sc_cs = regs->tf_cs;
525 frame.sf_sc.sc_eflags = regs->tf_rflags;
526 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
527 frame.sf_sc.sc_ss = regs->tf_ss;
528 frame.sf_sc.sc_err = regs->tf_err;
529 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
530 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
531
532 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
533 frame.sf_extramask[i] = lmask.__bits[i+1];
534
535 if (copyout(&frame, fp, sizeof(frame)) != 0) {
536 /*
537 * Process has trashed its stack; give it an illegal
538 * instruction to halt it in its tracks.
539 */
540 PROC_LOCK(p);
541 sigexit(td, SIGILL);
542 }
543
544 /*
545 * Build context to run handler in.
546 */
547 regs->tf_rsp = PTROUT(fp);
548 regs->tf_rip = linux32_sigcode;
549 regs->tf_rflags &= ~(PSL_T | PSL_D);
550 regs->tf_cs = _ucode32sel;
551 regs->tf_ss = _udatasel;
552 regs->tf_ds = _udatasel;
553 regs->tf_es = _udatasel;
554 regs->tf_fs = _ufssel;
555 regs->tf_gs = _ugssel;
556 regs->tf_flags = TF_HASSEGS;
557 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
558 PROC_LOCK(p);
559 mtx_lock(&psp->ps_mtx);
560}
561
562/*
563 * System call to cleanup state after a signal
564 * has been taken. Reset signal mask and
565 * stack state from context left by sendsig (above).
566 * Return to previous pc and psl as specified by
567 * context left by sendsig. Check carefully to
568 * make sure that the user has not modified the
569 * psl to gain improper privileges or to cause
570 * a machine fault.
571 */
572int
573linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
574{
575 struct l_sigframe frame;
576 struct trapframe *regs;
577 sigset_t bmask;
578 l_sigset_t lmask;
579 int eflags, i;
580 ksiginfo_t ksi;
581
582 regs = td->td_frame;
583
584#ifdef DEBUG
585 if (ldebug(sigreturn))
586 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
587#endif
588 /*
589 * The trampoline code hands us the sigframe.
590 * It is unsafe to keep track of it ourselves, in the event that a
591 * program jumps out of a signal handler.
592 */
593 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
594 return (EFAULT);
595
596 /*
597 * Check for security violations.
598 */
599#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
600 eflags = frame.sf_sc.sc_eflags;
601 if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
602 return(EINVAL);
603
604 /*
605 * Don't allow users to load a valid privileged %cs. Let the
606 * hardware check for invalid selectors, excess privilege in
607 * other selectors, invalid %eip's and invalid %esp's.
608 */
609#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
610 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
611 ksiginfo_init_trap(&ksi);
612 ksi.ksi_signo = SIGBUS;
613 ksi.ksi_code = BUS_OBJERR;
614 ksi.ksi_trapno = T_PROTFLT;
615 ksi.ksi_addr = (void *)regs->tf_rip;
616 trapsignal(td, &ksi);
617 return(EINVAL);
618 }
619
620 lmask.__bits[0] = frame.sf_sc.sc_mask;
621 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
622 lmask.__bits[i+1] = frame.sf_extramask[i];
623 linux_to_bsd_sigset(&lmask, &bmask);
624 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
625
626 /*
627 * Restore signal context.
628 */
629 regs->tf_rdi = frame.sf_sc.sc_edi;
630 regs->tf_rsi = frame.sf_sc.sc_esi;
631 regs->tf_rbp = frame.sf_sc.sc_ebp;
632 regs->tf_rbx = frame.sf_sc.sc_ebx;
633 regs->tf_rdx = frame.sf_sc.sc_edx;
634 regs->tf_rcx = frame.sf_sc.sc_ecx;
635 regs->tf_rax = frame.sf_sc.sc_eax;
636 regs->tf_rip = frame.sf_sc.sc_eip;
637 regs->tf_cs = frame.sf_sc.sc_cs;
638 regs->tf_ds = frame.sf_sc.sc_ds;
639 regs->tf_es = frame.sf_sc.sc_es;
640 regs->tf_fs = frame.sf_sc.sc_fs;
641 regs->tf_gs = frame.sf_sc.sc_gs;
642 regs->tf_rflags = eflags;
643 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
644 regs->tf_ss = frame.sf_sc.sc_ss;
645 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
646
647 return (EJUSTRETURN);
648}
649
650/*
651 * System call to cleanup state after a signal
652 * has been taken. Reset signal mask and
653 * stack state from context left by rt_sendsig (above).
654 * Return to previous pc and psl as specified by
655 * context left by sendsig. Check carefully to
656 * make sure that the user has not modified the
657 * psl to gain improper privileges or to cause
658 * a machine fault.
659 */
660int
661linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
662{
663 struct l_ucontext uc;
664 struct l_sigcontext *context;
665 sigset_t bmask;
666 l_stack_t *lss;
667 stack_t ss;
668 struct trapframe *regs;
669 int eflags;
670 ksiginfo_t ksi;
671
672 regs = td->td_frame;
673
674#ifdef DEBUG
675 if (ldebug(rt_sigreturn))
676 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
677#endif
678 /*
679 * The trampoline code hands us the ucontext.
680 * It is unsafe to keep track of it ourselves, in the event that a
681 * program jumps out of a signal handler.
682 */
683 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
684 return (EFAULT);
685
686 context = &uc.uc_mcontext;
687
688 /*
689 * Check for security violations.
690 */
691#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
692 eflags = context->sc_eflags;
693 if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
694 return(EINVAL);
695
696 /*
697 * Don't allow users to load a valid privileged %cs. Let the
698 * hardware check for invalid selectors, excess privilege in
699 * other selectors, invalid %eip's and invalid %esp's.
700 */
701#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
702 if (!CS_SECURE(context->sc_cs)) {
703 ksiginfo_init_trap(&ksi);
704 ksi.ksi_signo = SIGBUS;
705 ksi.ksi_code = BUS_OBJERR;
706 ksi.ksi_trapno = T_PROTFLT;
707 ksi.ksi_addr = (void *)regs->tf_rip;
708 trapsignal(td, &ksi);
709 return(EINVAL);
710 }
711
712 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
713 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
714
715 /*
716 * Restore signal context
717 */
718 regs->tf_gs = context->sc_gs;
719 regs->tf_fs = context->sc_fs;
720 regs->tf_es = context->sc_es;
721 regs->tf_ds = context->sc_ds;
722 regs->tf_rdi = context->sc_edi;
723 regs->tf_rsi = context->sc_esi;
724 regs->tf_rbp = context->sc_ebp;
725 regs->tf_rbx = context->sc_ebx;
726 regs->tf_rdx = context->sc_edx;
727 regs->tf_rcx = context->sc_ecx;
728 regs->tf_rax = context->sc_eax;
729 regs->tf_rip = context->sc_eip;
730 regs->tf_cs = context->sc_cs;
731 regs->tf_rflags = eflags;
732 regs->tf_rsp = context->sc_esp_at_signal;
733 regs->tf_ss = context->sc_ss;
734 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
735
736 /*
737 * call sigaltstack & ignore results..
738 */
739 lss = &uc.uc_stack;
740 ss.ss_sp = PTRIN(lss->ss_sp);
741 ss.ss_size = lss->ss_size;
742 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
743
744#ifdef DEBUG
745 if (ldebug(rt_sigreturn))
746 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
747 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
748#endif
749 (void)kern_sigaltstack(td, &ss, NULL);
750
751 return (EJUSTRETURN);
752}
753
754static int
755linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
756{
757 struct proc *p;
758 struct trapframe *frame;
759
760 p = td->td_proc;
761 frame = td->td_frame;
762
763 sa->args[0] = frame->tf_rbx;
764 sa->args[1] = frame->tf_rcx;
765 sa->args[2] = frame->tf_rdx;
766 sa->args[3] = frame->tf_rsi;
767 sa->args[4] = frame->tf_rdi;
768 sa->args[5] = frame->tf_rbp; /* Unconfirmed */
769 sa->code = frame->tf_rax;
770
771 if (sa->code >= p->p_sysent->sv_size)
772 sa->callp = &p->p_sysent->sv_table[0];
773 else
774 sa->callp = &p->p_sysent->sv_table[sa->code];
775 sa->narg = sa->callp->sy_narg;
776
777 td->td_retval[0] = 0;
778 td->td_retval[1] = frame->tf_rdx;
779
780 return (0);
781}
782
783/*
784 * If a linux binary is exec'ing something, try this image activator
785 * first. We override standard shell script execution in order to
786 * be able to modify the interpreter path. We only do this if a linux
787 * binary is doing the exec, so we do not create an EXEC module for it.
788 */
789static int exec_linux_imgact_try(struct image_params *iparams);
790
791static int
792exec_linux_imgact_try(struct image_params *imgp)
793{
794 const char *head = (const char *)imgp->image_header;
795 char *rpath;
796 int error = -1;
797
798 /*
799 * The interpreter for shell scripts run from a linux binary needs
800 * to be located in /compat/linux if possible in order to recursively
801 * maintain linux path emulation.
802 */
803 if (((const short *)head)[0] == SHELLMAGIC) {
804 /*
805 * Run our normal shell image activator. If it succeeds attempt
806 * to use the alternate path for the interpreter. If an
807 * alternate * path is found, use our stringspace to store it.
808 */
809 if ((error = exec_shell_imgact(imgp)) == 0) {
810 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
811 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
812 AT_FDCWD);
813 if (rpath != NULL)
814 imgp->args->fname_buf =
815 imgp->interpreter_name = rpath;
816 }
817 }
818 return (error);
819}
820
821/*
822 * Clear registers on exec
823 * XXX copied from ia32_signal.c.
824 */
825static void
826exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
827{
828 struct trapframe *regs = td->td_frame;
829 struct pcb *pcb = td->td_pcb;
830
831 mtx_lock(&dt_lock);
832 if (td->td_proc->p_md.md_ldt != NULL)
833 user_ldt_free(td);
834 else
835 mtx_unlock(&dt_lock);
836
837 critical_enter();
838 wrmsr(MSR_FSBASE, 0);
839 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
840 pcb->pcb_fsbase = 0;
841 pcb->pcb_gsbase = 0;
842 critical_exit();
843 pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
844
845 bzero((char *)regs, sizeof(struct trapframe));
846 regs->tf_rip = imgp->entry_addr;
847 regs->tf_rsp = stack;
848 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
849 regs->tf_gs = _ugssel;
850 regs->tf_fs = _ufssel;
851 regs->tf_es = _udatasel;
852 regs->tf_ds = _udatasel;
853 regs->tf_ss = _udatasel;
854 regs->tf_flags = TF_HASSEGS;
855 regs->tf_cs = _ucode32sel;
856 regs->tf_rbx = imgp->ps_strings;
857
858 fpstate_drop(td);
859
860 /* Do full restore on return so that we can change to a different %cs */
861 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
862 td->td_retval[1] = 0;
863}
864
865/*
866 * XXX copied from ia32_sysvec.c.
867 */
868static register_t *
869linux_copyout_strings(struct image_params *imgp)
870{
871 int argc, envc;
872 u_int32_t *vectp;
873 char *stringp, *destp;
874 u_int32_t *stack_base;
875 struct linux32_ps_strings *arginfo;
876
877 /*
878 * Calculate string base and vector table pointers.
879 */
880 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
881 destp = (caddr_t)arginfo - SPARE_USRSPACE -
882 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
883
884 /*
885 * If we have a valid auxargs ptr, prepare some room
886 * on the stack.
887 */
888 if (imgp->auxargs) {
889 /*
890 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
891 * lower compatibility.
892 */
893 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
894 (LINUX_AT_COUNT * 2);
895 /*
896 * The '+ 2' is for the null pointers at the end of each of
897 * the arg and env vector sets,and imgp->auxarg_size is room
898 * for argument of Runtime loader.
899 */
900 vectp = (u_int32_t *) (destp - (imgp->args->argc +
901 imgp->args->envc + 2 + imgp->auxarg_size) *
902 sizeof(u_int32_t));
903
904 } else
905 /*
906 * The '+ 2' is for the null pointers at the end of each of
907 * the arg and env vector sets
908 */
909 vectp = (u_int32_t *)(destp - (imgp->args->argc +
910 imgp->args->envc + 2) * sizeof(u_int32_t));
911
912 /*
913 * vectp also becomes our initial stack base
914 */
915 stack_base = vectp;
916
917 stringp = imgp->args->begin_argv;
918 argc = imgp->args->argc;
919 envc = imgp->args->envc;
920 /*
921 * Copy out strings - arguments and environment.
922 */
923 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
924
925 /*
926 * Fill in "ps_strings" struct for ps, w, etc.
927 */
928 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
929 suword32(&arginfo->ps_nargvstr, argc);
930
931 /*
932 * Fill in argument portion of vector table.
933 */
934 for (; argc > 0; --argc) {
935 suword32(vectp++, (uint32_t)(intptr_t)destp);
936 while (*stringp++ != 0)
937 destp++;
938 destp++;
939 }
940
941 /* a null vector table pointer separates the argp's from the envp's */
942 suword32(vectp++, 0);
943
944 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
945 suword32(&arginfo->ps_nenvstr, envc);
946
947 /*
948 * Fill in environment portion of vector table.
949 */
950 for (; envc > 0; --envc) {
951 suword32(vectp++, (uint32_t)(intptr_t)destp);
952 while (*stringp++ != 0)
953 destp++;
954 destp++;
955 }
956
957 /* end of vector table is a null pointer */
958 suword32(vectp, 0);
959
960 return ((register_t *)stack_base);
961}
962
963static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
964 "32-bit Linux emulation");
965
966static u_long linux32_maxdsiz = LINUX32_MAXDSIZ;
967SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
968 &linux32_maxdsiz, 0, "");
969static u_long linux32_maxssiz = LINUX32_MAXSSIZ;
970SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
971 &linux32_maxssiz, 0, "");
972static u_long linux32_maxvmem = LINUX32_MAXVMEM;
973SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
974 &linux32_maxvmem, 0, "");
975
976#if defined(DEBUG)
977SYSCTL_PROC(_compat_linux32, OID_AUTO, debug,
978 CTLTYPE_STRING | CTLFLAG_RW,
979 0, 0, linux_sysctl_debug, "A",
980 "Linux debugging control");
981#endif
982
979static void
980linux32_fixlimit(struct rlimit *rl, int which)
981{
982
983 switch (which) {
984 case RLIMIT_DATA:
985 if (linux32_maxdsiz != 0) {
986 if (rl->rlim_cur > linux32_maxdsiz)
987 rl->rlim_cur = linux32_maxdsiz;
988 if (rl->rlim_max > linux32_maxdsiz)
989 rl->rlim_max = linux32_maxdsiz;
990 }
991 break;
992 case RLIMIT_STACK:
993 if (linux32_maxssiz != 0) {
994 if (rl->rlim_cur > linux32_maxssiz)
995 rl->rlim_cur = linux32_maxssiz;
996 if (rl->rlim_max > linux32_maxssiz)
997 rl->rlim_max = linux32_maxssiz;
998 }
999 break;
1000 case RLIMIT_VMEM:
1001 if (linux32_maxvmem != 0) {
1002 if (rl->rlim_cur > linux32_maxvmem)
1003 rl->rlim_cur = linux32_maxvmem;
1004 if (rl->rlim_max > linux32_maxvmem)
1005 rl->rlim_max = linux32_maxvmem;
1006 }
1007 break;
1008 }
1009}
1010
1011struct sysentvec elf_linux_sysvec = {
1012 .sv_size = LINUX_SYS_MAXSYSCALL,
1013 .sv_table = linux_sysent,
1014 .sv_mask = 0,
1015 .sv_sigsize = LINUX_SIGTBLSZ,
1016 .sv_sigtbl = bsd_to_linux_signal,
1017 .sv_errsize = ELAST + 1,
1018 .sv_errtbl = bsd_to_linux_errno,
1019 .sv_transtrap = translate_traps,
1020 .sv_fixup = elf_linux_fixup,
1021 .sv_sendsig = linux_sendsig,
1022 .sv_sigcode = &_binary_linux32_locore_o_start,
1023 .sv_szsigcode = &linux_szsigcode,
1024 .sv_prepsyscall = NULL,
1025 .sv_name = "Linux ELF32",
1026 .sv_coredump = elf32_coredump,
1027 .sv_imgact_try = exec_linux_imgact_try,
1028 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1029 .sv_pagesize = PAGE_SIZE,
1030 .sv_minuser = VM_MIN_ADDRESS,
1031 .sv_maxuser = LINUX32_MAXUSER,
1032 .sv_usrstack = LINUX32_USRSTACK,
1033 .sv_psstrings = LINUX32_PS_STRINGS,
1034 .sv_stackprot = VM_PROT_ALL,
1035 .sv_copyout_strings = linux_copyout_strings,
1036 .sv_setregs = exec_linux_setregs,
1037 .sv_fixlimit = linux32_fixlimit,
1038 .sv_maxssiz = &linux32_maxssiz,
1039 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
1040 .sv_set_syscall_retval = cpu_set_syscall_retval,
1041 .sv_fetch_syscall_args = linux32_fetch_syscall_args,
1042 .sv_syscallnames = NULL,
1043 .sv_shared_page_base = LINUX32_SHAREDPAGE,
1044 .sv_shared_page_len = PAGE_SIZE,
1045 .sv_schedtail = linux_schedtail,
1046 .sv_thread_detach = linux_thread_detach,
1047};
1048
1049static void
1050linux_vdso_install(void *param)
1051{
1052
1053 linux_szsigcode = (&_binary_linux32_locore_o_end -
1054 &_binary_linux32_locore_o_start);
1055
1056 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
1057 panic("Linux invalid vdso size\n");
1058
1059 __elfN(linux_vdso_fixup)(&elf_linux_sysvec);
1060
1061 linux_shared_page_obj = __elfN(linux_shared_page_init)
1062 (&linux_shared_page_mapping);
1063
1064 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE);
1065
1066 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
1067 linux_szsigcode);
1068 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
1069
1070 linux_kplatform = linux_shared_page_mapping +
1071 (linux_platform - (caddr_t)LINUX32_SHAREDPAGE);
1072}
1073SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
1074 (sysinit_cfunc_t)linux_vdso_install, NULL);
1075
1076static void
1077linux_vdso_deinstall(void *param)
1078{
1079
1080 __elfN(linux_shared_page_fini)(linux_shared_page_obj);
1081};
1082SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
1083 (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
1084
1085static char GNU_ABI_VENDOR[] = "GNU";
1086static int GNULINUX_ABI_DESC = 0;
1087
1088static boolean_t
1089linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1090{
1091 const Elf32_Word *desc;
1092 uintptr_t p;
1093
1094 p = (uintptr_t)(note + 1);
1095 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1096
1097 desc = (const Elf32_Word *)p;
1098 if (desc[0] != GNULINUX_ABI_DESC)
1099 return (FALSE);
1100
1101 /*
1102 * For linux we encode osrel as follows (see linux_mib.c):
1103 * VVVMMMIII (version, major, minor), see linux_mib.c.
1104 */
1105 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1106
1107 return (TRUE);
1108}
1109
1110static Elf_Brandnote linux32_brandnote = {
1111 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1112 .hdr.n_descsz = 16, /* XXX at least 16 */
1113 .hdr.n_type = 1,
1114 .vendor = GNU_ABI_VENDOR,
1115 .flags = BN_TRANSLATE_OSREL,
1116 .trans_osrel = linux32_trans_osrel
1117};
1118
1119static Elf32_Brandinfo linux_brand = {
1120 .brand = ELFOSABI_LINUX,
1121 .machine = EM_386,
1122 .compat_3_brand = "Linux",
1123 .emul_path = "/compat/linux",
1124 .interp_path = "/lib/ld-linux.so.1",
1125 .sysvec = &elf_linux_sysvec,
1126 .interp_newpath = NULL,
1127 .brand_note = &linux32_brandnote,
1128 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1129};
1130
1131static Elf32_Brandinfo linux_glibc2brand = {
1132 .brand = ELFOSABI_LINUX,
1133 .machine = EM_386,
1134 .compat_3_brand = "Linux",
1135 .emul_path = "/compat/linux",
1136 .interp_path = "/lib/ld-linux.so.2",
1137 .sysvec = &elf_linux_sysvec,
1138 .interp_newpath = NULL,
1139 .brand_note = &linux32_brandnote,
1140 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1141};
1142
1143Elf32_Brandinfo *linux_brandlist[] = {
1144 &linux_brand,
1145 &linux_glibc2brand,
1146 NULL
1147};
1148
1149static int
1150linux_elf_modevent(module_t mod, int type, void *data)
1151{
1152 Elf32_Brandinfo **brandinfo;
1153 int error;
1154 struct linux_ioctl_handler **lihp;
983static void
984linux32_fixlimit(struct rlimit *rl, int which)
985{
986
987 switch (which) {
988 case RLIMIT_DATA:
989 if (linux32_maxdsiz != 0) {
990 if (rl->rlim_cur > linux32_maxdsiz)
991 rl->rlim_cur = linux32_maxdsiz;
992 if (rl->rlim_max > linux32_maxdsiz)
993 rl->rlim_max = linux32_maxdsiz;
994 }
995 break;
996 case RLIMIT_STACK:
997 if (linux32_maxssiz != 0) {
998 if (rl->rlim_cur > linux32_maxssiz)
999 rl->rlim_cur = linux32_maxssiz;
1000 if (rl->rlim_max > linux32_maxssiz)
1001 rl->rlim_max = linux32_maxssiz;
1002 }
1003 break;
1004 case RLIMIT_VMEM:
1005 if (linux32_maxvmem != 0) {
1006 if (rl->rlim_cur > linux32_maxvmem)
1007 rl->rlim_cur = linux32_maxvmem;
1008 if (rl->rlim_max > linux32_maxvmem)
1009 rl->rlim_max = linux32_maxvmem;
1010 }
1011 break;
1012 }
1013}
1014
1015struct sysentvec elf_linux_sysvec = {
1016 .sv_size = LINUX_SYS_MAXSYSCALL,
1017 .sv_table = linux_sysent,
1018 .sv_mask = 0,
1019 .sv_sigsize = LINUX_SIGTBLSZ,
1020 .sv_sigtbl = bsd_to_linux_signal,
1021 .sv_errsize = ELAST + 1,
1022 .sv_errtbl = bsd_to_linux_errno,
1023 .sv_transtrap = translate_traps,
1024 .sv_fixup = elf_linux_fixup,
1025 .sv_sendsig = linux_sendsig,
1026 .sv_sigcode = &_binary_linux32_locore_o_start,
1027 .sv_szsigcode = &linux_szsigcode,
1028 .sv_prepsyscall = NULL,
1029 .sv_name = "Linux ELF32",
1030 .sv_coredump = elf32_coredump,
1031 .sv_imgact_try = exec_linux_imgact_try,
1032 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1033 .sv_pagesize = PAGE_SIZE,
1034 .sv_minuser = VM_MIN_ADDRESS,
1035 .sv_maxuser = LINUX32_MAXUSER,
1036 .sv_usrstack = LINUX32_USRSTACK,
1037 .sv_psstrings = LINUX32_PS_STRINGS,
1038 .sv_stackprot = VM_PROT_ALL,
1039 .sv_copyout_strings = linux_copyout_strings,
1040 .sv_setregs = exec_linux_setregs,
1041 .sv_fixlimit = linux32_fixlimit,
1042 .sv_maxssiz = &linux32_maxssiz,
1043 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
1044 .sv_set_syscall_retval = cpu_set_syscall_retval,
1045 .sv_fetch_syscall_args = linux32_fetch_syscall_args,
1046 .sv_syscallnames = NULL,
1047 .sv_shared_page_base = LINUX32_SHAREDPAGE,
1048 .sv_shared_page_len = PAGE_SIZE,
1049 .sv_schedtail = linux_schedtail,
1050 .sv_thread_detach = linux_thread_detach,
1051};
1052
1053static void
1054linux_vdso_install(void *param)
1055{
1056
1057 linux_szsigcode = (&_binary_linux32_locore_o_end -
1058 &_binary_linux32_locore_o_start);
1059
1060 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
1061 panic("Linux invalid vdso size\n");
1062
1063 __elfN(linux_vdso_fixup)(&elf_linux_sysvec);
1064
1065 linux_shared_page_obj = __elfN(linux_shared_page_init)
1066 (&linux_shared_page_mapping);
1067
1068 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE);
1069
1070 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
1071 linux_szsigcode);
1072 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
1073
1074 linux_kplatform = linux_shared_page_mapping +
1075 (linux_platform - (caddr_t)LINUX32_SHAREDPAGE);
1076}
1077SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
1078 (sysinit_cfunc_t)linux_vdso_install, NULL);
1079
1080static void
1081linux_vdso_deinstall(void *param)
1082{
1083
1084 __elfN(linux_shared_page_fini)(linux_shared_page_obj);
1085};
1086SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
1087 (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
1088
1089static char GNU_ABI_VENDOR[] = "GNU";
1090static int GNULINUX_ABI_DESC = 0;
1091
1092static boolean_t
1093linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1094{
1095 const Elf32_Word *desc;
1096 uintptr_t p;
1097
1098 p = (uintptr_t)(note + 1);
1099 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1100
1101 desc = (const Elf32_Word *)p;
1102 if (desc[0] != GNULINUX_ABI_DESC)
1103 return (FALSE);
1104
1105 /*
1106 * For linux we encode osrel as follows (see linux_mib.c):
1107 * VVVMMMIII (version, major, minor), see linux_mib.c.
1108 */
1109 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1110
1111 return (TRUE);
1112}
1113
1114static Elf_Brandnote linux32_brandnote = {
1115 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1116 .hdr.n_descsz = 16, /* XXX at least 16 */
1117 .hdr.n_type = 1,
1118 .vendor = GNU_ABI_VENDOR,
1119 .flags = BN_TRANSLATE_OSREL,
1120 .trans_osrel = linux32_trans_osrel
1121};
1122
1123static Elf32_Brandinfo linux_brand = {
1124 .brand = ELFOSABI_LINUX,
1125 .machine = EM_386,
1126 .compat_3_brand = "Linux",
1127 .emul_path = "/compat/linux",
1128 .interp_path = "/lib/ld-linux.so.1",
1129 .sysvec = &elf_linux_sysvec,
1130 .interp_newpath = NULL,
1131 .brand_note = &linux32_brandnote,
1132 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1133};
1134
1135static Elf32_Brandinfo linux_glibc2brand = {
1136 .brand = ELFOSABI_LINUX,
1137 .machine = EM_386,
1138 .compat_3_brand = "Linux",
1139 .emul_path = "/compat/linux",
1140 .interp_path = "/lib/ld-linux.so.2",
1141 .sysvec = &elf_linux_sysvec,
1142 .interp_newpath = NULL,
1143 .brand_note = &linux32_brandnote,
1144 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1145};
1146
1147Elf32_Brandinfo *linux_brandlist[] = {
1148 &linux_brand,
1149 &linux_glibc2brand,
1150 NULL
1151};
1152
1153static int
1154linux_elf_modevent(module_t mod, int type, void *data)
1155{
1156 Elf32_Brandinfo **brandinfo;
1157 int error;
1158 struct linux_ioctl_handler **lihp;
1155 struct linux_device_handler **ldhp;
1156
1157 error = 0;
1158
1159 switch(type) {
1160 case MOD_LOAD:
1161 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1162 ++brandinfo)
1163 if (elf32_insert_brand_entry(*brandinfo) < 0)
1164 error = EINVAL;
1165 if (error == 0) {
1166 SET_FOREACH(lihp, linux_ioctl_handler_set)
1167 linux_ioctl_register_handler(*lihp);
1159
1160 error = 0;
1161
1162 switch(type) {
1163 case MOD_LOAD:
1164 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1165 ++brandinfo)
1166 if (elf32_insert_brand_entry(*brandinfo) < 0)
1167 error = EINVAL;
1168 if (error == 0) {
1169 SET_FOREACH(lihp, linux_ioctl_handler_set)
1170 linux_ioctl_register_handler(*lihp);
1168 SET_FOREACH(ldhp, linux_device_handler_set)
1169 linux_device_register_handler(*ldhp);
1170 LIST_INIT(&futex_list);
1171 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1172 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1173 linux_proc_exit, NULL, 1000);
1174 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1175 linux_proc_exec, NULL, 1000);
1176 linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1177 linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1171 LIST_INIT(&futex_list);
1172 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1173 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1174 linux_proc_exit, NULL, 1000);
1175 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1176 linux_proc_exec, NULL, 1000);
1177 linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1178 linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1178 linux_osd_jail_register();
1179 stclohz = (stathz ? stathz : hz);
1180 if (bootverbose)
1181 printf("Linux ELF exec handler installed\n");
1182 } else
1183 printf("cannot insert Linux ELF brand handler\n");
1184 break;
1185 case MOD_UNLOAD:
1186 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1187 ++brandinfo)
1188 if (elf32_brand_inuse(*brandinfo))
1189 error = EBUSY;
1190 if (error == 0) {
1191 for (brandinfo = &linux_brandlist[0];
1192 *brandinfo != NULL; ++brandinfo)
1193 if (elf32_remove_brand_entry(*brandinfo) < 0)
1194 error = EINVAL;
1195 }
1196 if (error == 0) {
1197 SET_FOREACH(lihp, linux_ioctl_handler_set)
1198 linux_ioctl_unregister_handler(*lihp);
1179 stclohz = (stathz ? stathz : hz);
1180 if (bootverbose)
1181 printf("Linux ELF exec handler installed\n");
1182 } else
1183 printf("cannot insert Linux ELF brand handler\n");
1184 break;
1185 case MOD_UNLOAD:
1186 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1187 ++brandinfo)
1188 if (elf32_brand_inuse(*brandinfo))
1189 error = EBUSY;
1190 if (error == 0) {
1191 for (brandinfo = &linux_brandlist[0];
1192 *brandinfo != NULL; ++brandinfo)
1193 if (elf32_remove_brand_entry(*brandinfo) < 0)
1194 error = EINVAL;
1195 }
1196 if (error == 0) {
1197 SET_FOREACH(lihp, linux_ioctl_handler_set)
1198 linux_ioctl_unregister_handler(*lihp);
1199 SET_FOREACH(ldhp, linux_device_handler_set)
1200 linux_device_unregister_handler(*ldhp);
1201 mtx_destroy(&futex_mtx);
1202 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1203 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1204 EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1199 mtx_destroy(&futex_mtx);
1200 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1201 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1202 EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1205 linux_osd_jail_deregister();
1206 if (bootverbose)
1207 printf("Linux ELF exec handler removed\n");
1208 } else
1209 printf("Could not deinstall ELF interpreter entry\n");
1210 break;
1211 default:
1212 return (EOPNOTSUPP);
1213 }
1214 return (error);
1215}
1216
1217static moduledata_t linux_elf_mod = {
1218 "linuxelf",
1219 linux_elf_modevent,
1220 0
1221};
1222
1223DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1203 if (bootverbose)
1204 printf("Linux ELF exec handler removed\n");
1205 } else
1206 printf("Could not deinstall ELF interpreter entry\n");
1207 break;
1208 default:
1209 return (EOPNOTSUPP);
1210 }
1211 return (error);
1212}
1213
1214static moduledata_t linux_elf_mod = {
1215 "linuxelf",
1216 linux_elf_modevent,
1217 0
1218};
1219
1220DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1221MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1);