Deleted Added
full compact
linux32_sysvec.c (293540) linux32_sysvec.c (293569)
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 S��ren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2004 Tim J. Robbins
3 * Copyright (c) 2003 Peter Wemm
4 * Copyright (c) 2002 Doug Rabson
5 * Copyright (c) 1998-1999 Andrew Gallatin
6 * Copyright (c) 1994-1996 S��ren Schmidt
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in this position and unchanged.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: stable/10/sys/amd64/linux32/linux32_sysvec.c 293540 2016-01-09 16:29:51Z dchagin $");
34__FBSDID("$FreeBSD: stable/10/sys/amd64/linux32/linux32_sysvec.c 293569 2016-01-09 17:18:03Z dchagin $");
35#include "opt_compat.h"
36
37#ifndef COMPAT_FREEBSD32
38#error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
39#endif
40
41#define __ELF_WORD_SIZE 32
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/exec.h>
46#include <sys/fcntl.h>
47#include <sys/imgact.h>
48#include <sys/imgact_elf.h>
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/module.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/signalvar.h>
57#include <sys/sysctl.h>
58#include <sys/syscallsubr.h>
59#include <sys/sysent.h>
60#include <sys/sysproto.h>
61#include <sys/vnode.h>
62#include <sys/eventhandler.h>
63
64#include <vm/vm.h>
65#include <vm/pmap.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_page.h>
70#include <vm/vm_param.h>
71
72#include <machine/cpu.h>
73#include <machine/md_var.h>
74#include <machine/pcb.h>
75#include <machine/specialreg.h>
76
77#include <amd64/linux32/linux.h>
78#include <amd64/linux32/linux32_proto.h>
79#include <compat/linux/linux_emul.h>
80#include <compat/linux/linux_futex.h>
81#include <compat/linux/linux_ioctl.h>
82#include <compat/linux/linux_mib.h>
83#include <compat/linux/linux_misc.h>
84#include <compat/linux/linux_signal.h>
85#include <compat/linux/linux_util.h>
86#include <compat/linux/linux_vdso.h>
87
88MODULE_VERSION(linux, 1);
89
90#define AUXARGS_ENTRY_32(pos, id, val) \
91 do { \
92 suword32(pos++, id); \
93 suword32(pos++, val); \
94 } while (0)
95
96#if BYTE_ORDER == LITTLE_ENDIAN
97#define SHELLMAGIC 0x2123 /* #! */
98#else
99#define SHELLMAGIC 0x2321
100#endif
101
102/*
103 * Allow the sendsig functions to use the ldebug() facility
104 * even though they are not syscalls themselves. Map them
105 * to syscall 0. This is slightly less bogus than using
106 * ldebug(sigreturn).
107 */
108#define LINUX_SYS_linux_rt_sendsig 0
109#define LINUX_SYS_linux_sendsig 0
110
111const char *linux_kplatform;
112static int linux_szsigcode;
113static vm_object_t linux_shared_page_obj;
114static char *linux_shared_page_mapping;
115extern char _binary_linux32_locore_o_start;
116extern char _binary_linux32_locore_o_end;
117
118extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
119
120SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
121
122static int elf_linux_fixup(register_t **stack_base,
123 struct image_params *iparams);
124static register_t *linux_copyout_strings(struct image_params *imgp);
125static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
126static void exec_linux_setregs(struct thread *td,
127 struct image_params *imgp, u_long stack);
128static void linux32_fixlimit(struct rlimit *rl, int which);
129static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
130static void linux_vdso_install(void *param);
131static void linux_vdso_deinstall(void *param);
132
133/*
134 * Linux syscalls return negative errno's, we do positive and map them
135 * Reference:
136 * FreeBSD: src/sys/sys/errno.h
137 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
138 * linux-2.6.17.8/include/asm-generic/errno.h
139 */
140static int bsd_to_linux_errno[ELAST + 1] = {
141 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
142 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
143 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
144 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
145 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
146 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
147 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
148 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
149 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
150 -72, -67, -71
151};
152
153int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
154 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
155 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
156 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
157 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
158 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
159 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
160 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
161 0, LINUX_SIGUSR1, LINUX_SIGUSR2
162};
163
164int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
165 SIGHUP, SIGINT, SIGQUIT, SIGILL,
166 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
167 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
168 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
169 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
170 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
171 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
172 SIGIO, SIGURG, SIGSYS
173};
174
175#define LINUX_T_UNKNOWN 255
176static int _bsd_to_linux_trapcode[] = {
177 LINUX_T_UNKNOWN, /* 0 */
178 6, /* 1 T_PRIVINFLT */
179 LINUX_T_UNKNOWN, /* 2 */
180 3, /* 3 T_BPTFLT */
181 LINUX_T_UNKNOWN, /* 4 */
182 LINUX_T_UNKNOWN, /* 5 */
183 16, /* 6 T_ARITHTRAP */
184 254, /* 7 T_ASTFLT */
185 LINUX_T_UNKNOWN, /* 8 */
186 13, /* 9 T_PROTFLT */
187 1, /* 10 T_TRCTRAP */
188 LINUX_T_UNKNOWN, /* 11 */
189 14, /* 12 T_PAGEFLT */
190 LINUX_T_UNKNOWN, /* 13 */
191 17, /* 14 T_ALIGNFLT */
192 LINUX_T_UNKNOWN, /* 15 */
193 LINUX_T_UNKNOWN, /* 16 */
194 LINUX_T_UNKNOWN, /* 17 */
195 0, /* 18 T_DIVIDE */
196 2, /* 19 T_NMI */
197 4, /* 20 T_OFLOW */
198 5, /* 21 T_BOUND */
199 7, /* 22 T_DNA */
200 8, /* 23 T_DOUBLEFLT */
201 9, /* 24 T_FPOPFLT */
202 10, /* 25 T_TSSFLT */
203 11, /* 26 T_SEGNPFLT */
204 12, /* 27 T_STKFLT */
205 18, /* 28 T_MCHK */
206 19, /* 29 T_XMMFLT */
207 15 /* 30 T_RESERVED */
208};
209#define bsd_to_linux_trapcode(code) \
210 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
211 _bsd_to_linux_trapcode[(code)]: \
212 LINUX_T_UNKNOWN)
213
214struct linux32_ps_strings {
215 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
216 u_int ps_nargvstr; /* the number of argument strings */
217 u_int32_t ps_envstr; /* first of 0 or more environment strings */
218 u_int ps_nenvstr; /* the number of environment strings */
219};
220
221LINUX_VDSO_SYM_INTPTR(linux32_sigcode);
222LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode);
223LINUX_VDSO_SYM_INTPTR(linux32_vsyscall);
224LINUX_VDSO_SYM_CHAR(linux_platform);
225
226/*
227 * If FreeBSD & Linux have a difference of opinion about what a trap
228 * means, deal with it here.
229 *
230 * MPSAFE
231 */
232static int
233translate_traps(int signal, int trap_code)
234{
235 if (signal != SIGBUS)
236 return signal;
237 switch (trap_code) {
238 case T_PROTFLT:
239 case T_TSSFLT:
240 case T_DOUBLEFLT:
241 case T_PAGEFLT:
242 return SIGSEGV;
243 default:
244 return signal;
245 }
246}
247
248static int
249elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
250{
251 Elf32_Auxargs *args;
252 Elf32_Addr *base;
253 Elf32_Addr *pos;
254 struct linux32_ps_strings *arginfo;
255
256 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
257
258 KASSERT(curthread->td_proc == imgp->proc,
259 ("unsafe elf_linux_fixup(), should be curproc"));
260 base = (Elf32_Addr *)*stack_base;
261 args = (Elf32_Auxargs *)imgp->auxargs;
262 pos = base + (imgp->args->argc + imgp->args->envc + 2);
263
264 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR,
265 imgp->proc->p_sysent->sv_shared_page_base);
266 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall);
267 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
268
269 /*
270 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
271 * as it has appeared in the 2.4.0-rc7 first time.
272 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
273 * glibc falls back to the hard-coded CLK_TCK value when aux entry
274 * is not present.
275 * Also see linux_times() implementation.
276 */
277 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
278 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
279 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
280 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
281 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
282 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
283 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
284 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
285 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
286 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
287 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
288 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
289 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
290 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
291 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
292 AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, PTROUT(imgp->canary));
293 if (imgp->execpathp != 0)
294 AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, PTROUT(imgp->execpathp));
295 if (args->execfd != -1)
296 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
297 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
298
299 free(imgp->auxargs, M_TEMP);
300 imgp->auxargs = NULL;
301
302 base--;
303 suword32(base, (uint32_t)imgp->args->argc);
304 *stack_base = (register_t *)base;
305 return (0);
306}
307
308static void
309linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
310{
311 struct thread *td = curthread;
312 struct proc *p = td->td_proc;
313 struct sigacts *psp;
314 struct trapframe *regs;
315 struct l_rt_sigframe *fp, frame;
316 int oonstack;
317 int sig;
318 int code;
319
320 sig = ksi->ksi_signo;
321 code = ksi->ksi_code;
322 PROC_LOCK_ASSERT(p, MA_OWNED);
323 psp = p->p_sigacts;
324 mtx_assert(&psp->ps_mtx, MA_OWNED);
325 regs = td->td_frame;
326 oonstack = sigonstack(regs->tf_rsp);
327
328#ifdef DEBUG
329 if (ldebug(rt_sendsig))
330 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
331 catcher, sig, (void*)mask, code);
332#endif
333 /*
334 * Allocate space for the signal handler context.
335 */
336 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
337 SIGISMEMBER(psp->ps_sigonstack, sig)) {
338 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
339 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
340 } else
341 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
342 mtx_unlock(&psp->ps_mtx);
343
344 /*
345 * Build the argument list for the signal handler.
346 */
347 sig = BSD_TO_LINUX_SIGNAL(sig);
348
349 bzero(&frame, sizeof(frame));
350
351 frame.sf_handler = PTROUT(catcher);
352 frame.sf_sig = sig;
353 frame.sf_siginfo = PTROUT(&fp->sf_si);
354 frame.sf_ucontext = PTROUT(&fp->sf_sc);
355
356 /* Fill in POSIX parts */
357 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
358
359 /*
360 * Build the signal context to be used by sigreturn
361 * and libgcc unwind.
362 */
363 frame.sf_sc.uc_flags = 0; /* XXX ??? */
364 frame.sf_sc.uc_link = 0; /* XXX ??? */
365
366 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
367 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
368 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
369 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
370 PROC_UNLOCK(p);
371
372 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
373
374 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
375 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
376 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
377 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
378 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
379 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp;
380 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
381 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
382 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
383 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
384 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
385 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
386 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
387 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
388 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
389 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
390 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
391 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
392 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
393 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
394 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
395
396#ifdef DEBUG
397 if (ldebug(rt_sendsig))
398 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
399 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
400 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
401#endif
402
403 if (copyout(&frame, fp, sizeof(frame)) != 0) {
404 /*
405 * Process has trashed its stack; give it an illegal
406 * instruction to halt it in its tracks.
407 */
408#ifdef DEBUG
409 if (ldebug(rt_sendsig))
410 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
411 fp, oonstack);
412#endif
413 PROC_LOCK(p);
414 sigexit(td, SIGILL);
415 }
416
417 /*
418 * Build context to run handler in.
419 */
420 regs->tf_rsp = PTROUT(fp);
421 regs->tf_rip = linux32_rt_sigcode;
422 regs->tf_rflags &= ~(PSL_T | PSL_D);
423 regs->tf_cs = _ucode32sel;
424 regs->tf_ss = _udatasel;
425 regs->tf_ds = _udatasel;
426 regs->tf_es = _udatasel;
427 regs->tf_fs = _ufssel;
428 regs->tf_gs = _ugssel;
429 regs->tf_flags = TF_HASSEGS;
430 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
431 PROC_LOCK(p);
432 mtx_lock(&psp->ps_mtx);
433}
434
435
436/*
437 * Send an interrupt to process.
438 *
439 * Stack is set up to allow sigcode stored
440 * in u. to call routine, followed by kcall
441 * to sigreturn routine below. After sigreturn
442 * resets the signal mask, the stack, and the
443 * frame pointer, it returns to the user
444 * specified pc, psl.
445 */
446static void
447linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
448{
449 struct thread *td = curthread;
450 struct proc *p = td->td_proc;
451 struct sigacts *psp;
452 struct trapframe *regs;
453 struct l_sigframe *fp, frame;
454 l_sigset_t lmask;
455 int oonstack, i;
456 int sig, code;
457
458 sig = ksi->ksi_signo;
459 code = ksi->ksi_code;
460 PROC_LOCK_ASSERT(p, MA_OWNED);
461 psp = p->p_sigacts;
462 mtx_assert(&psp->ps_mtx, MA_OWNED);
463 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
464 /* Signal handler installed with SA_SIGINFO. */
465 linux_rt_sendsig(catcher, ksi, mask);
466 return;
467 }
468
469 regs = td->td_frame;
470 oonstack = sigonstack(regs->tf_rsp);
471
472#ifdef DEBUG
473 if (ldebug(sendsig))
474 printf(ARGS(sendsig, "%p, %d, %p, %u"),
475 catcher, sig, (void*)mask, code);
476#endif
477
478 /*
479 * Allocate space for the signal handler context.
480 */
481 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
482 SIGISMEMBER(psp->ps_sigonstack, sig)) {
483 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
484 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
485 } else
486 fp = (struct l_sigframe *)regs->tf_rsp - 1;
487 mtx_unlock(&psp->ps_mtx);
488 PROC_UNLOCK(p);
489
490 /*
491 * Build the argument list for the signal handler.
492 */
493 sig = BSD_TO_LINUX_SIGNAL(sig);
494
495 bzero(&frame, sizeof(frame));
496
497 frame.sf_handler = PTROUT(catcher);
498 frame.sf_sig = sig;
499
500 bsd_to_linux_sigset(mask, &lmask);
501
502 /*
503 * Build the signal context to be used by sigreturn.
504 */
505 frame.sf_sc.sc_mask = lmask.__bits[0];
506 frame.sf_sc.sc_gs = regs->tf_gs;
507 frame.sf_sc.sc_fs = regs->tf_fs;
508 frame.sf_sc.sc_es = regs->tf_es;
509 frame.sf_sc.sc_ds = regs->tf_ds;
510 frame.sf_sc.sc_edi = regs->tf_rdi;
511 frame.sf_sc.sc_esi = regs->tf_rsi;
512 frame.sf_sc.sc_ebp = regs->tf_rbp;
513 frame.sf_sc.sc_ebx = regs->tf_rbx;
514 frame.sf_sc.sc_esp = regs->tf_rsp;
515 frame.sf_sc.sc_edx = regs->tf_rdx;
516 frame.sf_sc.sc_ecx = regs->tf_rcx;
517 frame.sf_sc.sc_eax = regs->tf_rax;
518 frame.sf_sc.sc_eip = regs->tf_rip;
519 frame.sf_sc.sc_cs = regs->tf_cs;
520 frame.sf_sc.sc_eflags = regs->tf_rflags;
521 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
522 frame.sf_sc.sc_ss = regs->tf_ss;
523 frame.sf_sc.sc_err = regs->tf_err;
524 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
525 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
526
527 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
528 frame.sf_extramask[i] = lmask.__bits[i+1];
529
530 if (copyout(&frame, fp, sizeof(frame)) != 0) {
531 /*
532 * Process has trashed its stack; give it an illegal
533 * instruction to halt it in its tracks.
534 */
535 PROC_LOCK(p);
536 sigexit(td, SIGILL);
537 }
538
539 /*
540 * Build context to run handler in.
541 */
542 regs->tf_rsp = PTROUT(fp);
543 regs->tf_rip = linux32_sigcode;
544 regs->tf_rflags &= ~(PSL_T | PSL_D);
545 regs->tf_cs = _ucode32sel;
546 regs->tf_ss = _udatasel;
547 regs->tf_ds = _udatasel;
548 regs->tf_es = _udatasel;
549 regs->tf_fs = _ufssel;
550 regs->tf_gs = _ugssel;
551 regs->tf_flags = TF_HASSEGS;
552 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
553 PROC_LOCK(p);
554 mtx_lock(&psp->ps_mtx);
555}
556
557/*
558 * System call to cleanup state after a signal
559 * has been taken. Reset signal mask and
560 * stack state from context left by sendsig (above).
561 * Return to previous pc and psl as specified by
562 * context left by sendsig. Check carefully to
563 * make sure that the user has not modified the
564 * psl to gain improper privileges or to cause
565 * a machine fault.
566 */
567int
568linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
569{
570 struct l_sigframe frame;
571 struct trapframe *regs;
572 sigset_t bmask;
573 l_sigset_t lmask;
574 int eflags, i;
575 ksiginfo_t ksi;
576
577 regs = td->td_frame;
578
579#ifdef DEBUG
580 if (ldebug(sigreturn))
581 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
582#endif
583 /*
584 * The trampoline code hands us the sigframe.
585 * It is unsafe to keep track of it ourselves, in the event that a
586 * program jumps out of a signal handler.
587 */
588 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
589 return (EFAULT);
590
591 /*
592 * Check for security violations.
593 */
594#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
595 eflags = frame.sf_sc.sc_eflags;
596 if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
597 return(EINVAL);
598
599 /*
600 * Don't allow users to load a valid privileged %cs. Let the
601 * hardware check for invalid selectors, excess privilege in
602 * other selectors, invalid %eip's and invalid %esp's.
603 */
604#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
605 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
606 ksiginfo_init_trap(&ksi);
607 ksi.ksi_signo = SIGBUS;
608 ksi.ksi_code = BUS_OBJERR;
609 ksi.ksi_trapno = T_PROTFLT;
610 ksi.ksi_addr = (void *)regs->tf_rip;
611 trapsignal(td, &ksi);
612 return(EINVAL);
613 }
614
615 lmask.__bits[0] = frame.sf_sc.sc_mask;
616 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
617 lmask.__bits[i+1] = frame.sf_extramask[i];
618 linux_to_bsd_sigset(&lmask, &bmask);
619 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
620
621 /*
622 * Restore signal context.
623 */
624 regs->tf_rdi = frame.sf_sc.sc_edi;
625 regs->tf_rsi = frame.sf_sc.sc_esi;
626 regs->tf_rbp = frame.sf_sc.sc_ebp;
627 regs->tf_rbx = frame.sf_sc.sc_ebx;
628 regs->tf_rdx = frame.sf_sc.sc_edx;
629 regs->tf_rcx = frame.sf_sc.sc_ecx;
630 regs->tf_rax = frame.sf_sc.sc_eax;
631 regs->tf_rip = frame.sf_sc.sc_eip;
632 regs->tf_cs = frame.sf_sc.sc_cs;
633 regs->tf_ds = frame.sf_sc.sc_ds;
634 regs->tf_es = frame.sf_sc.sc_es;
635 regs->tf_fs = frame.sf_sc.sc_fs;
636 regs->tf_gs = frame.sf_sc.sc_gs;
637 regs->tf_rflags = eflags;
638 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
639 regs->tf_ss = frame.sf_sc.sc_ss;
640 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
641
642 return (EJUSTRETURN);
643}
644
645/*
646 * System call to cleanup state after a signal
647 * has been taken. Reset signal mask and
648 * stack state from context left by rt_sendsig (above).
649 * Return to previous pc and psl as specified by
650 * context left by sendsig. Check carefully to
651 * make sure that the user has not modified the
652 * psl to gain improper privileges or to cause
653 * a machine fault.
654 */
655int
656linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
657{
658 struct l_ucontext uc;
659 struct l_sigcontext *context;
660 sigset_t bmask;
661 l_stack_t *lss;
662 stack_t ss;
663 struct trapframe *regs;
664 int eflags;
665 ksiginfo_t ksi;
666
667 regs = td->td_frame;
668
669#ifdef DEBUG
670 if (ldebug(rt_sigreturn))
671 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
672#endif
673 /*
674 * The trampoline code hands us the ucontext.
675 * It is unsafe to keep track of it ourselves, in the event that a
676 * program jumps out of a signal handler.
677 */
678 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
679 return (EFAULT);
680
681 context = &uc.uc_mcontext;
682
683 /*
684 * Check for security violations.
685 */
686#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
687 eflags = context->sc_eflags;
688 if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
689 return(EINVAL);
690
691 /*
692 * Don't allow users to load a valid privileged %cs. Let the
693 * hardware check for invalid selectors, excess privilege in
694 * other selectors, invalid %eip's and invalid %esp's.
695 */
696#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
697 if (!CS_SECURE(context->sc_cs)) {
698 ksiginfo_init_trap(&ksi);
699 ksi.ksi_signo = SIGBUS;
700 ksi.ksi_code = BUS_OBJERR;
701 ksi.ksi_trapno = T_PROTFLT;
702 ksi.ksi_addr = (void *)regs->tf_rip;
703 trapsignal(td, &ksi);
704 return(EINVAL);
705 }
706
707 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
708 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
709
710 /*
711 * Restore signal context
712 */
713 regs->tf_gs = context->sc_gs;
714 regs->tf_fs = context->sc_fs;
715 regs->tf_es = context->sc_es;
716 regs->tf_ds = context->sc_ds;
717 regs->tf_rdi = context->sc_edi;
718 regs->tf_rsi = context->sc_esi;
719 regs->tf_rbp = context->sc_ebp;
720 regs->tf_rbx = context->sc_ebx;
721 regs->tf_rdx = context->sc_edx;
722 regs->tf_rcx = context->sc_ecx;
723 regs->tf_rax = context->sc_eax;
724 regs->tf_rip = context->sc_eip;
725 regs->tf_cs = context->sc_cs;
726 regs->tf_rflags = eflags;
727 regs->tf_rsp = context->sc_esp_at_signal;
728 regs->tf_ss = context->sc_ss;
729 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
730
731 /*
732 * call sigaltstack & ignore results..
733 */
734 lss = &uc.uc_stack;
735 ss.ss_sp = PTRIN(lss->ss_sp);
736 ss.ss_size = lss->ss_size;
737 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
738
739#ifdef DEBUG
740 if (ldebug(rt_sigreturn))
741 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
742 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
743#endif
744 (void)kern_sigaltstack(td, &ss, NULL);
745
746 return (EJUSTRETURN);
747}
748
749static int
750linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
751{
752 struct proc *p;
753 struct trapframe *frame;
754
755 p = td->td_proc;
756 frame = td->td_frame;
757
758 sa->args[0] = frame->tf_rbx;
759 sa->args[1] = frame->tf_rcx;
760 sa->args[2] = frame->tf_rdx;
761 sa->args[3] = frame->tf_rsi;
762 sa->args[4] = frame->tf_rdi;
763 sa->args[5] = frame->tf_rbp; /* Unconfirmed */
764 sa->code = frame->tf_rax;
765
766 if (sa->code >= p->p_sysent->sv_size)
35#include "opt_compat.h"
36
37#ifndef COMPAT_FREEBSD32
38#error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
39#endif
40
41#define __ELF_WORD_SIZE 32
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/exec.h>
46#include <sys/fcntl.h>
47#include <sys/imgact.h>
48#include <sys/imgact_elf.h>
49#include <sys/kernel.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/module.h>
53#include <sys/mutex.h>
54#include <sys/proc.h>
55#include <sys/resourcevar.h>
56#include <sys/signalvar.h>
57#include <sys/sysctl.h>
58#include <sys/syscallsubr.h>
59#include <sys/sysent.h>
60#include <sys/sysproto.h>
61#include <sys/vnode.h>
62#include <sys/eventhandler.h>
63
64#include <vm/vm.h>
65#include <vm/pmap.h>
66#include <vm/vm_extern.h>
67#include <vm/vm_map.h>
68#include <vm/vm_object.h>
69#include <vm/vm_page.h>
70#include <vm/vm_param.h>
71
72#include <machine/cpu.h>
73#include <machine/md_var.h>
74#include <machine/pcb.h>
75#include <machine/specialreg.h>
76
77#include <amd64/linux32/linux.h>
78#include <amd64/linux32/linux32_proto.h>
79#include <compat/linux/linux_emul.h>
80#include <compat/linux/linux_futex.h>
81#include <compat/linux/linux_ioctl.h>
82#include <compat/linux/linux_mib.h>
83#include <compat/linux/linux_misc.h>
84#include <compat/linux/linux_signal.h>
85#include <compat/linux/linux_util.h>
86#include <compat/linux/linux_vdso.h>
87
88MODULE_VERSION(linux, 1);
89
90#define AUXARGS_ENTRY_32(pos, id, val) \
91 do { \
92 suword32(pos++, id); \
93 suword32(pos++, val); \
94 } while (0)
95
96#if BYTE_ORDER == LITTLE_ENDIAN
97#define SHELLMAGIC 0x2123 /* #! */
98#else
99#define SHELLMAGIC 0x2321
100#endif
101
102/*
103 * Allow the sendsig functions to use the ldebug() facility
104 * even though they are not syscalls themselves. Map them
105 * to syscall 0. This is slightly less bogus than using
106 * ldebug(sigreturn).
107 */
108#define LINUX_SYS_linux_rt_sendsig 0
109#define LINUX_SYS_linux_sendsig 0
110
111const char *linux_kplatform;
112static int linux_szsigcode;
113static vm_object_t linux_shared_page_obj;
114static char *linux_shared_page_mapping;
115extern char _binary_linux32_locore_o_start;
116extern char _binary_linux32_locore_o_end;
117
118extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
119
120SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
121
122static int elf_linux_fixup(register_t **stack_base,
123 struct image_params *iparams);
124static register_t *linux_copyout_strings(struct image_params *imgp);
125static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
126static void exec_linux_setregs(struct thread *td,
127 struct image_params *imgp, u_long stack);
128static void linux32_fixlimit(struct rlimit *rl, int which);
129static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
130static void linux_vdso_install(void *param);
131static void linux_vdso_deinstall(void *param);
132
133/*
134 * Linux syscalls return negative errno's, we do positive and map them
135 * Reference:
136 * FreeBSD: src/sys/sys/errno.h
137 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h
138 * linux-2.6.17.8/include/asm-generic/errno.h
139 */
140static int bsd_to_linux_errno[ELAST + 1] = {
141 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9,
142 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
143 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
144 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
145 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
146 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
147 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
148 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9,
149 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74,
150 -72, -67, -71
151};
152
153int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
154 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
155 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
156 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
157 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
158 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
159 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
160 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
161 0, LINUX_SIGUSR1, LINUX_SIGUSR2
162};
163
164int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
165 SIGHUP, SIGINT, SIGQUIT, SIGILL,
166 SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
167 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
168 SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
169 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
170 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
171 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
172 SIGIO, SIGURG, SIGSYS
173};
174
175#define LINUX_T_UNKNOWN 255
176static int _bsd_to_linux_trapcode[] = {
177 LINUX_T_UNKNOWN, /* 0 */
178 6, /* 1 T_PRIVINFLT */
179 LINUX_T_UNKNOWN, /* 2 */
180 3, /* 3 T_BPTFLT */
181 LINUX_T_UNKNOWN, /* 4 */
182 LINUX_T_UNKNOWN, /* 5 */
183 16, /* 6 T_ARITHTRAP */
184 254, /* 7 T_ASTFLT */
185 LINUX_T_UNKNOWN, /* 8 */
186 13, /* 9 T_PROTFLT */
187 1, /* 10 T_TRCTRAP */
188 LINUX_T_UNKNOWN, /* 11 */
189 14, /* 12 T_PAGEFLT */
190 LINUX_T_UNKNOWN, /* 13 */
191 17, /* 14 T_ALIGNFLT */
192 LINUX_T_UNKNOWN, /* 15 */
193 LINUX_T_UNKNOWN, /* 16 */
194 LINUX_T_UNKNOWN, /* 17 */
195 0, /* 18 T_DIVIDE */
196 2, /* 19 T_NMI */
197 4, /* 20 T_OFLOW */
198 5, /* 21 T_BOUND */
199 7, /* 22 T_DNA */
200 8, /* 23 T_DOUBLEFLT */
201 9, /* 24 T_FPOPFLT */
202 10, /* 25 T_TSSFLT */
203 11, /* 26 T_SEGNPFLT */
204 12, /* 27 T_STKFLT */
205 18, /* 28 T_MCHK */
206 19, /* 29 T_XMMFLT */
207 15 /* 30 T_RESERVED */
208};
209#define bsd_to_linux_trapcode(code) \
210 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
211 _bsd_to_linux_trapcode[(code)]: \
212 LINUX_T_UNKNOWN)
213
214struct linux32_ps_strings {
215 u_int32_t ps_argvstr; /* first of 0 or more argument strings */
216 u_int ps_nargvstr; /* the number of argument strings */
217 u_int32_t ps_envstr; /* first of 0 or more environment strings */
218 u_int ps_nenvstr; /* the number of environment strings */
219};
220
221LINUX_VDSO_SYM_INTPTR(linux32_sigcode);
222LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode);
223LINUX_VDSO_SYM_INTPTR(linux32_vsyscall);
224LINUX_VDSO_SYM_CHAR(linux_platform);
225
226/*
227 * If FreeBSD & Linux have a difference of opinion about what a trap
228 * means, deal with it here.
229 *
230 * MPSAFE
231 */
232static int
233translate_traps(int signal, int trap_code)
234{
235 if (signal != SIGBUS)
236 return signal;
237 switch (trap_code) {
238 case T_PROTFLT:
239 case T_TSSFLT:
240 case T_DOUBLEFLT:
241 case T_PAGEFLT:
242 return SIGSEGV;
243 default:
244 return signal;
245 }
246}
247
248static int
249elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
250{
251 Elf32_Auxargs *args;
252 Elf32_Addr *base;
253 Elf32_Addr *pos;
254 struct linux32_ps_strings *arginfo;
255
256 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
257
258 KASSERT(curthread->td_proc == imgp->proc,
259 ("unsafe elf_linux_fixup(), should be curproc"));
260 base = (Elf32_Addr *)*stack_base;
261 args = (Elf32_Auxargs *)imgp->auxargs;
262 pos = base + (imgp->args->argc + imgp->args->envc + 2);
263
264 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR,
265 imgp->proc->p_sysent->sv_shared_page_base);
266 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall);
267 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
268
269 /*
270 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
271 * as it has appeared in the 2.4.0-rc7 first time.
272 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
273 * glibc falls back to the hard-coded CLK_TCK value when aux entry
274 * is not present.
275 * Also see linux_times() implementation.
276 */
277 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
278 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
279 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
280 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
281 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
282 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
283 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
284 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
285 AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
286 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
287 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
288 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
289 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
290 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
291 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
292 AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, PTROUT(imgp->canary));
293 if (imgp->execpathp != 0)
294 AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, PTROUT(imgp->execpathp));
295 if (args->execfd != -1)
296 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
297 AUXARGS_ENTRY_32(pos, AT_NULL, 0);
298
299 free(imgp->auxargs, M_TEMP);
300 imgp->auxargs = NULL;
301
302 base--;
303 suword32(base, (uint32_t)imgp->args->argc);
304 *stack_base = (register_t *)base;
305 return (0);
306}
307
308static void
309linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
310{
311 struct thread *td = curthread;
312 struct proc *p = td->td_proc;
313 struct sigacts *psp;
314 struct trapframe *regs;
315 struct l_rt_sigframe *fp, frame;
316 int oonstack;
317 int sig;
318 int code;
319
320 sig = ksi->ksi_signo;
321 code = ksi->ksi_code;
322 PROC_LOCK_ASSERT(p, MA_OWNED);
323 psp = p->p_sigacts;
324 mtx_assert(&psp->ps_mtx, MA_OWNED);
325 regs = td->td_frame;
326 oonstack = sigonstack(regs->tf_rsp);
327
328#ifdef DEBUG
329 if (ldebug(rt_sendsig))
330 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
331 catcher, sig, (void*)mask, code);
332#endif
333 /*
334 * Allocate space for the signal handler context.
335 */
336 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
337 SIGISMEMBER(psp->ps_sigonstack, sig)) {
338 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
339 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
340 } else
341 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
342 mtx_unlock(&psp->ps_mtx);
343
344 /*
345 * Build the argument list for the signal handler.
346 */
347 sig = BSD_TO_LINUX_SIGNAL(sig);
348
349 bzero(&frame, sizeof(frame));
350
351 frame.sf_handler = PTROUT(catcher);
352 frame.sf_sig = sig;
353 frame.sf_siginfo = PTROUT(&fp->sf_si);
354 frame.sf_ucontext = PTROUT(&fp->sf_sc);
355
356 /* Fill in POSIX parts */
357 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
358
359 /*
360 * Build the signal context to be used by sigreturn
361 * and libgcc unwind.
362 */
363 frame.sf_sc.uc_flags = 0; /* XXX ??? */
364 frame.sf_sc.uc_link = 0; /* XXX ??? */
365
366 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
367 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
368 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
369 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
370 PROC_UNLOCK(p);
371
372 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
373
374 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
375 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
376 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
377 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
378 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx;
379 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp;
380 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx;
381 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx;
382 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
383 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
384 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
385 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
386 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
387 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
388 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
389 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
390 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
391 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
392 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err;
393 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
394 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
395
396#ifdef DEBUG
397 if (ldebug(rt_sendsig))
398 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
399 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
400 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
401#endif
402
403 if (copyout(&frame, fp, sizeof(frame)) != 0) {
404 /*
405 * Process has trashed its stack; give it an illegal
406 * instruction to halt it in its tracks.
407 */
408#ifdef DEBUG
409 if (ldebug(rt_sendsig))
410 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
411 fp, oonstack);
412#endif
413 PROC_LOCK(p);
414 sigexit(td, SIGILL);
415 }
416
417 /*
418 * Build context to run handler in.
419 */
420 regs->tf_rsp = PTROUT(fp);
421 regs->tf_rip = linux32_rt_sigcode;
422 regs->tf_rflags &= ~(PSL_T | PSL_D);
423 regs->tf_cs = _ucode32sel;
424 regs->tf_ss = _udatasel;
425 regs->tf_ds = _udatasel;
426 regs->tf_es = _udatasel;
427 regs->tf_fs = _ufssel;
428 regs->tf_gs = _ugssel;
429 regs->tf_flags = TF_HASSEGS;
430 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
431 PROC_LOCK(p);
432 mtx_lock(&psp->ps_mtx);
433}
434
435
436/*
437 * Send an interrupt to process.
438 *
439 * Stack is set up to allow sigcode stored
440 * in u. to call routine, followed by kcall
441 * to sigreturn routine below. After sigreturn
442 * resets the signal mask, the stack, and the
443 * frame pointer, it returns to the user
444 * specified pc, psl.
445 */
446static void
447linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
448{
449 struct thread *td = curthread;
450 struct proc *p = td->td_proc;
451 struct sigacts *psp;
452 struct trapframe *regs;
453 struct l_sigframe *fp, frame;
454 l_sigset_t lmask;
455 int oonstack, i;
456 int sig, code;
457
458 sig = ksi->ksi_signo;
459 code = ksi->ksi_code;
460 PROC_LOCK_ASSERT(p, MA_OWNED);
461 psp = p->p_sigacts;
462 mtx_assert(&psp->ps_mtx, MA_OWNED);
463 if (SIGISMEMBER(psp->ps_siginfo, sig)) {
464 /* Signal handler installed with SA_SIGINFO. */
465 linux_rt_sendsig(catcher, ksi, mask);
466 return;
467 }
468
469 regs = td->td_frame;
470 oonstack = sigonstack(regs->tf_rsp);
471
472#ifdef DEBUG
473 if (ldebug(sendsig))
474 printf(ARGS(sendsig, "%p, %d, %p, %u"),
475 catcher, sig, (void*)mask, code);
476#endif
477
478 /*
479 * Allocate space for the signal handler context.
480 */
481 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
482 SIGISMEMBER(psp->ps_sigonstack, sig)) {
483 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
484 td->td_sigstk.ss_size - sizeof(struct l_sigframe));
485 } else
486 fp = (struct l_sigframe *)regs->tf_rsp - 1;
487 mtx_unlock(&psp->ps_mtx);
488 PROC_UNLOCK(p);
489
490 /*
491 * Build the argument list for the signal handler.
492 */
493 sig = BSD_TO_LINUX_SIGNAL(sig);
494
495 bzero(&frame, sizeof(frame));
496
497 frame.sf_handler = PTROUT(catcher);
498 frame.sf_sig = sig;
499
500 bsd_to_linux_sigset(mask, &lmask);
501
502 /*
503 * Build the signal context to be used by sigreturn.
504 */
505 frame.sf_sc.sc_mask = lmask.__bits[0];
506 frame.sf_sc.sc_gs = regs->tf_gs;
507 frame.sf_sc.sc_fs = regs->tf_fs;
508 frame.sf_sc.sc_es = regs->tf_es;
509 frame.sf_sc.sc_ds = regs->tf_ds;
510 frame.sf_sc.sc_edi = regs->tf_rdi;
511 frame.sf_sc.sc_esi = regs->tf_rsi;
512 frame.sf_sc.sc_ebp = regs->tf_rbp;
513 frame.sf_sc.sc_ebx = regs->tf_rbx;
514 frame.sf_sc.sc_esp = regs->tf_rsp;
515 frame.sf_sc.sc_edx = regs->tf_rdx;
516 frame.sf_sc.sc_ecx = regs->tf_rcx;
517 frame.sf_sc.sc_eax = regs->tf_rax;
518 frame.sf_sc.sc_eip = regs->tf_rip;
519 frame.sf_sc.sc_cs = regs->tf_cs;
520 frame.sf_sc.sc_eflags = regs->tf_rflags;
521 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
522 frame.sf_sc.sc_ss = regs->tf_ss;
523 frame.sf_sc.sc_err = regs->tf_err;
524 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr;
525 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
526
527 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
528 frame.sf_extramask[i] = lmask.__bits[i+1];
529
530 if (copyout(&frame, fp, sizeof(frame)) != 0) {
531 /*
532 * Process has trashed its stack; give it an illegal
533 * instruction to halt it in its tracks.
534 */
535 PROC_LOCK(p);
536 sigexit(td, SIGILL);
537 }
538
539 /*
540 * Build context to run handler in.
541 */
542 regs->tf_rsp = PTROUT(fp);
543 regs->tf_rip = linux32_sigcode;
544 regs->tf_rflags &= ~(PSL_T | PSL_D);
545 regs->tf_cs = _ucode32sel;
546 regs->tf_ss = _udatasel;
547 regs->tf_ds = _udatasel;
548 regs->tf_es = _udatasel;
549 regs->tf_fs = _ufssel;
550 regs->tf_gs = _ugssel;
551 regs->tf_flags = TF_HASSEGS;
552 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
553 PROC_LOCK(p);
554 mtx_lock(&psp->ps_mtx);
555}
556
557/*
558 * System call to cleanup state after a signal
559 * has been taken. Reset signal mask and
560 * stack state from context left by sendsig (above).
561 * Return to previous pc and psl as specified by
562 * context left by sendsig. Check carefully to
563 * make sure that the user has not modified the
564 * psl to gain improper privileges or to cause
565 * a machine fault.
566 */
567int
568linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
569{
570 struct l_sigframe frame;
571 struct trapframe *regs;
572 sigset_t bmask;
573 l_sigset_t lmask;
574 int eflags, i;
575 ksiginfo_t ksi;
576
577 regs = td->td_frame;
578
579#ifdef DEBUG
580 if (ldebug(sigreturn))
581 printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
582#endif
583 /*
584 * The trampoline code hands us the sigframe.
585 * It is unsafe to keep track of it ourselves, in the event that a
586 * program jumps out of a signal handler.
587 */
588 if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
589 return (EFAULT);
590
591 /*
592 * Check for security violations.
593 */
594#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
595 eflags = frame.sf_sc.sc_eflags;
596 if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
597 return(EINVAL);
598
599 /*
600 * Don't allow users to load a valid privileged %cs. Let the
601 * hardware check for invalid selectors, excess privilege in
602 * other selectors, invalid %eip's and invalid %esp's.
603 */
604#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
605 if (!CS_SECURE(frame.sf_sc.sc_cs)) {
606 ksiginfo_init_trap(&ksi);
607 ksi.ksi_signo = SIGBUS;
608 ksi.ksi_code = BUS_OBJERR;
609 ksi.ksi_trapno = T_PROTFLT;
610 ksi.ksi_addr = (void *)regs->tf_rip;
611 trapsignal(td, &ksi);
612 return(EINVAL);
613 }
614
615 lmask.__bits[0] = frame.sf_sc.sc_mask;
616 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
617 lmask.__bits[i+1] = frame.sf_extramask[i];
618 linux_to_bsd_sigset(&lmask, &bmask);
619 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
620
621 /*
622 * Restore signal context.
623 */
624 regs->tf_rdi = frame.sf_sc.sc_edi;
625 regs->tf_rsi = frame.sf_sc.sc_esi;
626 regs->tf_rbp = frame.sf_sc.sc_ebp;
627 regs->tf_rbx = frame.sf_sc.sc_ebx;
628 regs->tf_rdx = frame.sf_sc.sc_edx;
629 regs->tf_rcx = frame.sf_sc.sc_ecx;
630 regs->tf_rax = frame.sf_sc.sc_eax;
631 regs->tf_rip = frame.sf_sc.sc_eip;
632 regs->tf_cs = frame.sf_sc.sc_cs;
633 regs->tf_ds = frame.sf_sc.sc_ds;
634 regs->tf_es = frame.sf_sc.sc_es;
635 regs->tf_fs = frame.sf_sc.sc_fs;
636 regs->tf_gs = frame.sf_sc.sc_gs;
637 regs->tf_rflags = eflags;
638 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
639 regs->tf_ss = frame.sf_sc.sc_ss;
640 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
641
642 return (EJUSTRETURN);
643}
644
645/*
646 * System call to cleanup state after a signal
647 * has been taken. Reset signal mask and
648 * stack state from context left by rt_sendsig (above).
649 * Return to previous pc and psl as specified by
650 * context left by sendsig. Check carefully to
651 * make sure that the user has not modified the
652 * psl to gain improper privileges or to cause
653 * a machine fault.
654 */
655int
656linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
657{
658 struct l_ucontext uc;
659 struct l_sigcontext *context;
660 sigset_t bmask;
661 l_stack_t *lss;
662 stack_t ss;
663 struct trapframe *regs;
664 int eflags;
665 ksiginfo_t ksi;
666
667 regs = td->td_frame;
668
669#ifdef DEBUG
670 if (ldebug(rt_sigreturn))
671 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
672#endif
673 /*
674 * The trampoline code hands us the ucontext.
675 * It is unsafe to keep track of it ourselves, in the event that a
676 * program jumps out of a signal handler.
677 */
678 if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
679 return (EFAULT);
680
681 context = &uc.uc_mcontext;
682
683 /*
684 * Check for security violations.
685 */
686#define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
687 eflags = context->sc_eflags;
688 if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
689 return(EINVAL);
690
691 /*
692 * Don't allow users to load a valid privileged %cs. Let the
693 * hardware check for invalid selectors, excess privilege in
694 * other selectors, invalid %eip's and invalid %esp's.
695 */
696#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
697 if (!CS_SECURE(context->sc_cs)) {
698 ksiginfo_init_trap(&ksi);
699 ksi.ksi_signo = SIGBUS;
700 ksi.ksi_code = BUS_OBJERR;
701 ksi.ksi_trapno = T_PROTFLT;
702 ksi.ksi_addr = (void *)regs->tf_rip;
703 trapsignal(td, &ksi);
704 return(EINVAL);
705 }
706
707 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
708 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
709
710 /*
711 * Restore signal context
712 */
713 regs->tf_gs = context->sc_gs;
714 regs->tf_fs = context->sc_fs;
715 regs->tf_es = context->sc_es;
716 regs->tf_ds = context->sc_ds;
717 regs->tf_rdi = context->sc_edi;
718 regs->tf_rsi = context->sc_esi;
719 regs->tf_rbp = context->sc_ebp;
720 regs->tf_rbx = context->sc_ebx;
721 regs->tf_rdx = context->sc_edx;
722 regs->tf_rcx = context->sc_ecx;
723 regs->tf_rax = context->sc_eax;
724 regs->tf_rip = context->sc_eip;
725 regs->tf_cs = context->sc_cs;
726 regs->tf_rflags = eflags;
727 regs->tf_rsp = context->sc_esp_at_signal;
728 regs->tf_ss = context->sc_ss;
729 set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
730
731 /*
732 * call sigaltstack & ignore results..
733 */
734 lss = &uc.uc_stack;
735 ss.ss_sp = PTRIN(lss->ss_sp);
736 ss.ss_size = lss->ss_size;
737 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
738
739#ifdef DEBUG
740 if (ldebug(rt_sigreturn))
741 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
742 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
743#endif
744 (void)kern_sigaltstack(td, &ss, NULL);
745
746 return (EJUSTRETURN);
747}
748
749static int
750linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
751{
752 struct proc *p;
753 struct trapframe *frame;
754
755 p = td->td_proc;
756 frame = td->td_frame;
757
758 sa->args[0] = frame->tf_rbx;
759 sa->args[1] = frame->tf_rcx;
760 sa->args[2] = frame->tf_rdx;
761 sa->args[3] = frame->tf_rsi;
762 sa->args[4] = frame->tf_rdi;
763 sa->args[5] = frame->tf_rbp; /* Unconfirmed */
764 sa->code = frame->tf_rax;
765
766 if (sa->code >= p->p_sysent->sv_size)
767 sa->callp = &p->p_sysent->sv_table[0];
767 /* nosys */
768 sa->callp = &p->p_sysent->sv_table[LINUX_SYS_MAXSYSCALL];
768 else
769 sa->callp = &p->p_sysent->sv_table[sa->code];
770 sa->narg = sa->callp->sy_narg;
771
772 td->td_retval[0] = 0;
773 td->td_retval[1] = frame->tf_rdx;
774
775 return (0);
776}
777
778/*
779 * If a linux binary is exec'ing something, try this image activator
780 * first. We override standard shell script execution in order to
781 * be able to modify the interpreter path. We only do this if a linux
782 * binary is doing the exec, so we do not create an EXEC module for it.
783 */
784static int exec_linux_imgact_try(struct image_params *iparams);
785
786static int
787exec_linux_imgact_try(struct image_params *imgp)
788{
789 const char *head = (const char *)imgp->image_header;
790 char *rpath;
791 int error = -1;
792
793 /*
794 * The interpreter for shell scripts run from a linux binary needs
795 * to be located in /compat/linux if possible in order to recursively
796 * maintain linux path emulation.
797 */
798 if (((const short *)head)[0] == SHELLMAGIC) {
799 /*
800 * Run our normal shell image activator. If it succeeds attempt
801 * to use the alternate path for the interpreter. If an
802 * alternate * path is found, use our stringspace to store it.
803 */
804 if ((error = exec_shell_imgact(imgp)) == 0) {
805 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
806 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
807 AT_FDCWD);
808 if (rpath != NULL)
809 imgp->args->fname_buf =
810 imgp->interpreter_name = rpath;
811 }
812 }
813 return (error);
814}
815
816/*
817 * Clear registers on exec
818 * XXX copied from ia32_signal.c.
819 */
820static void
821exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
822{
823 struct trapframe *regs = td->td_frame;
824 struct pcb *pcb = td->td_pcb;
825
826 mtx_lock(&dt_lock);
827 if (td->td_proc->p_md.md_ldt != NULL)
828 user_ldt_free(td);
829 else
830 mtx_unlock(&dt_lock);
831
832 critical_enter();
833 wrmsr(MSR_FSBASE, 0);
834 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
835 pcb->pcb_fsbase = 0;
836 pcb->pcb_gsbase = 0;
837 critical_exit();
838 pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
839
840 bzero((char *)regs, sizeof(struct trapframe));
841 regs->tf_rip = imgp->entry_addr;
842 regs->tf_rsp = stack;
843 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
844 regs->tf_gs = _ugssel;
845 regs->tf_fs = _ufssel;
846 regs->tf_es = _udatasel;
847 regs->tf_ds = _udatasel;
848 regs->tf_ss = _udatasel;
849 regs->tf_flags = TF_HASSEGS;
850 regs->tf_cs = _ucode32sel;
851 regs->tf_rbx = imgp->ps_strings;
852
853 fpstate_drop(td);
854
855 /* Do full restore on return so that we can change to a different %cs */
856 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
857 td->td_retval[1] = 0;
858}
859
860/*
861 * XXX copied from ia32_sysvec.c.
862 */
863static register_t *
864linux_copyout_strings(struct image_params *imgp)
865{
866 int argc, envc;
867 u_int32_t *vectp;
868 char *stringp, *destp;
869 u_int32_t *stack_base;
870 struct linux32_ps_strings *arginfo;
871 char canary[LINUX_AT_RANDOM_LEN];
872 size_t execpath_len;
873
874 /*
875 * Calculate string base and vector table pointers.
876 */
877 if (imgp->execpath != NULL && imgp->auxargs != NULL)
878 execpath_len = strlen(imgp->execpath) + 1;
879 else
880 execpath_len = 0;
881
882 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
883 destp = (caddr_t)arginfo - SPARE_USRSPACE -
884 roundup(sizeof(canary), sizeof(char *)) -
885 roundup(execpath_len, sizeof(char *)) -
886 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
887
888 if (execpath_len != 0) {
889 imgp->execpathp = (uintptr_t)arginfo - execpath_len;
890 copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len);
891 }
892
893 /*
894 * Prepare the canary for SSP.
895 */
896 arc4rand(canary, sizeof(canary), 0);
897 imgp->canary = (uintptr_t)arginfo -
898 roundup(execpath_len, sizeof(char *)) -
899 roundup(sizeof(canary), sizeof(char *));
900 copyout(canary, (void *)imgp->canary, sizeof(canary));
901
902 /*
903 * If we have a valid auxargs ptr, prepare some room
904 * on the stack.
905 */
906 if (imgp->auxargs) {
907 /*
908 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
909 * lower compatibility.
910 */
911 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
912 (LINUX_AT_COUNT * 2);
913 /*
914 * The '+ 2' is for the null pointers at the end of each of
915 * the arg and env vector sets,and imgp->auxarg_size is room
916 * for argument of Runtime loader.
917 */
918 vectp = (u_int32_t *) (destp - (imgp->args->argc +
919 imgp->args->envc + 2 + imgp->auxarg_size) *
920 sizeof(u_int32_t));
921
922 } else
923 /*
924 * The '+ 2' is for the null pointers at the end of each of
925 * the arg and env vector sets
926 */
927 vectp = (u_int32_t *)(destp - (imgp->args->argc +
928 imgp->args->envc + 2) * sizeof(u_int32_t));
929
930 /*
931 * vectp also becomes our initial stack base
932 */
933 stack_base = vectp;
934
935 stringp = imgp->args->begin_argv;
936 argc = imgp->args->argc;
937 envc = imgp->args->envc;
938 /*
939 * Copy out strings - arguments and environment.
940 */
941 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
942
943 /*
944 * Fill in "ps_strings" struct for ps, w, etc.
945 */
946 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
947 suword32(&arginfo->ps_nargvstr, argc);
948
949 /*
950 * Fill in argument portion of vector table.
951 */
952 for (; argc > 0; --argc) {
953 suword32(vectp++, (uint32_t)(intptr_t)destp);
954 while (*stringp++ != 0)
955 destp++;
956 destp++;
957 }
958
959 /* a null vector table pointer separates the argp's from the envp's */
960 suword32(vectp++, 0);
961
962 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
963 suword32(&arginfo->ps_nenvstr, envc);
964
965 /*
966 * Fill in environment portion of vector table.
967 */
968 for (; envc > 0; --envc) {
969 suword32(vectp++, (uint32_t)(intptr_t)destp);
970 while (*stringp++ != 0)
971 destp++;
972 destp++;
973 }
974
975 /* end of vector table is a null pointer */
976 suword32(vectp, 0);
977
978 return ((register_t *)stack_base);
979}
980
981static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
982 "32-bit Linux emulation");
983
984static u_long linux32_maxdsiz = LINUX32_MAXDSIZ;
985SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
986 &linux32_maxdsiz, 0, "");
987static u_long linux32_maxssiz = LINUX32_MAXSSIZ;
988SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
989 &linux32_maxssiz, 0, "");
990static u_long linux32_maxvmem = LINUX32_MAXVMEM;
991SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
992 &linux32_maxvmem, 0, "");
993
994#if defined(DEBUG)
995SYSCTL_PROC(_compat_linux32, OID_AUTO, debug,
996 CTLTYPE_STRING | CTLFLAG_RW,
997 0, 0, linux_sysctl_debug, "A",
998 "Linux debugging control");
999#endif
1000
1001static void
1002linux32_fixlimit(struct rlimit *rl, int which)
1003{
1004
1005 switch (which) {
1006 case RLIMIT_DATA:
1007 if (linux32_maxdsiz != 0) {
1008 if (rl->rlim_cur > linux32_maxdsiz)
1009 rl->rlim_cur = linux32_maxdsiz;
1010 if (rl->rlim_max > linux32_maxdsiz)
1011 rl->rlim_max = linux32_maxdsiz;
1012 }
1013 break;
1014 case RLIMIT_STACK:
1015 if (linux32_maxssiz != 0) {
1016 if (rl->rlim_cur > linux32_maxssiz)
1017 rl->rlim_cur = linux32_maxssiz;
1018 if (rl->rlim_max > linux32_maxssiz)
1019 rl->rlim_max = linux32_maxssiz;
1020 }
1021 break;
1022 case RLIMIT_VMEM:
1023 if (linux32_maxvmem != 0) {
1024 if (rl->rlim_cur > linux32_maxvmem)
1025 rl->rlim_cur = linux32_maxvmem;
1026 if (rl->rlim_max > linux32_maxvmem)
1027 rl->rlim_max = linux32_maxvmem;
1028 }
1029 break;
1030 }
1031}
1032
1033struct sysentvec elf_linux_sysvec = {
1034 .sv_size = LINUX_SYS_MAXSYSCALL,
1035 .sv_table = linux_sysent,
1036 .sv_mask = 0,
1037 .sv_sigsize = LINUX_SIGTBLSZ,
1038 .sv_sigtbl = bsd_to_linux_signal,
1039 .sv_errsize = ELAST + 1,
1040 .sv_errtbl = bsd_to_linux_errno,
1041 .sv_transtrap = translate_traps,
1042 .sv_fixup = elf_linux_fixup,
1043 .sv_sendsig = linux_sendsig,
1044 .sv_sigcode = &_binary_linux32_locore_o_start,
1045 .sv_szsigcode = &linux_szsigcode,
1046 .sv_prepsyscall = NULL,
1047 .sv_name = "Linux ELF32",
1048 .sv_coredump = elf32_coredump,
1049 .sv_imgact_try = exec_linux_imgact_try,
1050 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1051 .sv_pagesize = PAGE_SIZE,
1052 .sv_minuser = VM_MIN_ADDRESS,
1053 .sv_maxuser = LINUX32_MAXUSER,
1054 .sv_usrstack = LINUX32_USRSTACK,
1055 .sv_psstrings = LINUX32_PS_STRINGS,
1056 .sv_stackprot = VM_PROT_ALL,
1057 .sv_copyout_strings = linux_copyout_strings,
1058 .sv_setregs = exec_linux_setregs,
1059 .sv_fixlimit = linux32_fixlimit,
1060 .sv_maxssiz = &linux32_maxssiz,
1061 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
1062 .sv_set_syscall_retval = cpu_set_syscall_retval,
1063 .sv_fetch_syscall_args = linux32_fetch_syscall_args,
1064 .sv_syscallnames = NULL,
1065 .sv_shared_page_base = LINUX32_SHAREDPAGE,
1066 .sv_shared_page_len = PAGE_SIZE,
1067 .sv_schedtail = linux_schedtail,
1068 .sv_thread_detach = linux_thread_detach,
1069};
1070
1071static void
1072linux_vdso_install(void *param)
1073{
1074
1075 linux_szsigcode = (&_binary_linux32_locore_o_end -
1076 &_binary_linux32_locore_o_start);
1077
1078 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
1079 panic("Linux invalid vdso size\n");
1080
1081 __elfN(linux_vdso_fixup)(&elf_linux_sysvec);
1082
1083 linux_shared_page_obj = __elfN(linux_shared_page_init)
1084 (&linux_shared_page_mapping);
1085
1086 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE);
1087
1088 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
1089 linux_szsigcode);
1090 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
1091
1092 linux_kplatform = linux_shared_page_mapping +
1093 (linux_platform - (caddr_t)LINUX32_SHAREDPAGE);
1094}
1095SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
1096 (sysinit_cfunc_t)linux_vdso_install, NULL);
1097
1098static void
1099linux_vdso_deinstall(void *param)
1100{
1101
1102 __elfN(linux_shared_page_fini)(linux_shared_page_obj);
1103};
1104SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
1105 (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
1106
1107static char GNU_ABI_VENDOR[] = "GNU";
1108static int GNULINUX_ABI_DESC = 0;
1109
1110static boolean_t
1111linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1112{
1113 const Elf32_Word *desc;
1114 uintptr_t p;
1115
1116 p = (uintptr_t)(note + 1);
1117 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1118
1119 desc = (const Elf32_Word *)p;
1120 if (desc[0] != GNULINUX_ABI_DESC)
1121 return (FALSE);
1122
1123 /*
1124 * For linux we encode osrel as follows (see linux_mib.c):
1125 * VVVMMMIII (version, major, minor), see linux_mib.c.
1126 */
1127 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1128
1129 return (TRUE);
1130}
1131
1132static Elf_Brandnote linux32_brandnote = {
1133 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1134 .hdr.n_descsz = 16, /* XXX at least 16 */
1135 .hdr.n_type = 1,
1136 .vendor = GNU_ABI_VENDOR,
1137 .flags = BN_TRANSLATE_OSREL,
1138 .trans_osrel = linux32_trans_osrel
1139};
1140
1141static Elf32_Brandinfo linux_brand = {
1142 .brand = ELFOSABI_LINUX,
1143 .machine = EM_386,
1144 .compat_3_brand = "Linux",
1145 .emul_path = "/compat/linux",
1146 .interp_path = "/lib/ld-linux.so.1",
1147 .sysvec = &elf_linux_sysvec,
1148 .interp_newpath = NULL,
1149 .brand_note = &linux32_brandnote,
1150 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1151};
1152
1153static Elf32_Brandinfo linux_glibc2brand = {
1154 .brand = ELFOSABI_LINUX,
1155 .machine = EM_386,
1156 .compat_3_brand = "Linux",
1157 .emul_path = "/compat/linux",
1158 .interp_path = "/lib/ld-linux.so.2",
1159 .sysvec = &elf_linux_sysvec,
1160 .interp_newpath = NULL,
1161 .brand_note = &linux32_brandnote,
1162 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1163};
1164
1165Elf32_Brandinfo *linux_brandlist[] = {
1166 &linux_brand,
1167 &linux_glibc2brand,
1168 NULL
1169};
1170
1171static int
1172linux_elf_modevent(module_t mod, int type, void *data)
1173{
1174 Elf32_Brandinfo **brandinfo;
1175 int error;
1176 struct linux_ioctl_handler **lihp;
1177
1178 error = 0;
1179
1180 switch(type) {
1181 case MOD_LOAD:
1182 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1183 ++brandinfo)
1184 if (elf32_insert_brand_entry(*brandinfo) < 0)
1185 error = EINVAL;
1186 if (error == 0) {
1187 SET_FOREACH(lihp, linux_ioctl_handler_set)
1188 linux_ioctl_register_handler(*lihp);
1189 LIST_INIT(&futex_list);
1190 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1191 stclohz = (stathz ? stathz : hz);
1192 if (bootverbose)
1193 printf("Linux ELF exec handler installed\n");
1194 } else
1195 printf("cannot insert Linux ELF brand handler\n");
1196 break;
1197 case MOD_UNLOAD:
1198 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1199 ++brandinfo)
1200 if (elf32_brand_inuse(*brandinfo))
1201 error = EBUSY;
1202 if (error == 0) {
1203 for (brandinfo = &linux_brandlist[0];
1204 *brandinfo != NULL; ++brandinfo)
1205 if (elf32_remove_brand_entry(*brandinfo) < 0)
1206 error = EINVAL;
1207 }
1208 if (error == 0) {
1209 SET_FOREACH(lihp, linux_ioctl_handler_set)
1210 linux_ioctl_unregister_handler(*lihp);
1211 mtx_destroy(&futex_mtx);
1212 if (bootverbose)
1213 printf("Linux ELF exec handler removed\n");
1214 } else
1215 printf("Could not deinstall ELF interpreter entry\n");
1216 break;
1217 default:
1218 return (EOPNOTSUPP);
1219 }
1220 return (error);
1221}
1222
1223static moduledata_t linux_elf_mod = {
1224 "linuxelf",
1225 linux_elf_modevent,
1226 0
1227};
1228
1229DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1230MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1);
769 else
770 sa->callp = &p->p_sysent->sv_table[sa->code];
771 sa->narg = sa->callp->sy_narg;
772
773 td->td_retval[0] = 0;
774 td->td_retval[1] = frame->tf_rdx;
775
776 return (0);
777}
778
779/*
780 * If a linux binary is exec'ing something, try this image activator
781 * first. We override standard shell script execution in order to
782 * be able to modify the interpreter path. We only do this if a linux
783 * binary is doing the exec, so we do not create an EXEC module for it.
784 */
785static int exec_linux_imgact_try(struct image_params *iparams);
786
787static int
788exec_linux_imgact_try(struct image_params *imgp)
789{
790 const char *head = (const char *)imgp->image_header;
791 char *rpath;
792 int error = -1;
793
794 /*
795 * The interpreter for shell scripts run from a linux binary needs
796 * to be located in /compat/linux if possible in order to recursively
797 * maintain linux path emulation.
798 */
799 if (((const short *)head)[0] == SHELLMAGIC) {
800 /*
801 * Run our normal shell image activator. If it succeeds attempt
802 * to use the alternate path for the interpreter. If an
803 * alternate * path is found, use our stringspace to store it.
804 */
805 if ((error = exec_shell_imgact(imgp)) == 0) {
806 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
807 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
808 AT_FDCWD);
809 if (rpath != NULL)
810 imgp->args->fname_buf =
811 imgp->interpreter_name = rpath;
812 }
813 }
814 return (error);
815}
816
817/*
818 * Clear registers on exec
819 * XXX copied from ia32_signal.c.
820 */
821static void
822exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
823{
824 struct trapframe *regs = td->td_frame;
825 struct pcb *pcb = td->td_pcb;
826
827 mtx_lock(&dt_lock);
828 if (td->td_proc->p_md.md_ldt != NULL)
829 user_ldt_free(td);
830 else
831 mtx_unlock(&dt_lock);
832
833 critical_enter();
834 wrmsr(MSR_FSBASE, 0);
835 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
836 pcb->pcb_fsbase = 0;
837 pcb->pcb_gsbase = 0;
838 critical_exit();
839 pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
840
841 bzero((char *)regs, sizeof(struct trapframe));
842 regs->tf_rip = imgp->entry_addr;
843 regs->tf_rsp = stack;
844 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
845 regs->tf_gs = _ugssel;
846 regs->tf_fs = _ufssel;
847 regs->tf_es = _udatasel;
848 regs->tf_ds = _udatasel;
849 regs->tf_ss = _udatasel;
850 regs->tf_flags = TF_HASSEGS;
851 regs->tf_cs = _ucode32sel;
852 regs->tf_rbx = imgp->ps_strings;
853
854 fpstate_drop(td);
855
856 /* Do full restore on return so that we can change to a different %cs */
857 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
858 td->td_retval[1] = 0;
859}
860
861/*
862 * XXX copied from ia32_sysvec.c.
863 */
864static register_t *
865linux_copyout_strings(struct image_params *imgp)
866{
867 int argc, envc;
868 u_int32_t *vectp;
869 char *stringp, *destp;
870 u_int32_t *stack_base;
871 struct linux32_ps_strings *arginfo;
872 char canary[LINUX_AT_RANDOM_LEN];
873 size_t execpath_len;
874
875 /*
876 * Calculate string base and vector table pointers.
877 */
878 if (imgp->execpath != NULL && imgp->auxargs != NULL)
879 execpath_len = strlen(imgp->execpath) + 1;
880 else
881 execpath_len = 0;
882
883 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
884 destp = (caddr_t)arginfo - SPARE_USRSPACE -
885 roundup(sizeof(canary), sizeof(char *)) -
886 roundup(execpath_len, sizeof(char *)) -
887 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
888
889 if (execpath_len != 0) {
890 imgp->execpathp = (uintptr_t)arginfo - execpath_len;
891 copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len);
892 }
893
894 /*
895 * Prepare the canary for SSP.
896 */
897 arc4rand(canary, sizeof(canary), 0);
898 imgp->canary = (uintptr_t)arginfo -
899 roundup(execpath_len, sizeof(char *)) -
900 roundup(sizeof(canary), sizeof(char *));
901 copyout(canary, (void *)imgp->canary, sizeof(canary));
902
903 /*
904 * If we have a valid auxargs ptr, prepare some room
905 * on the stack.
906 */
907 if (imgp->auxargs) {
908 /*
909 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
910 * lower compatibility.
911 */
912 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
913 (LINUX_AT_COUNT * 2);
914 /*
915 * The '+ 2' is for the null pointers at the end of each of
916 * the arg and env vector sets,and imgp->auxarg_size is room
917 * for argument of Runtime loader.
918 */
919 vectp = (u_int32_t *) (destp - (imgp->args->argc +
920 imgp->args->envc + 2 + imgp->auxarg_size) *
921 sizeof(u_int32_t));
922
923 } else
924 /*
925 * The '+ 2' is for the null pointers at the end of each of
926 * the arg and env vector sets
927 */
928 vectp = (u_int32_t *)(destp - (imgp->args->argc +
929 imgp->args->envc + 2) * sizeof(u_int32_t));
930
931 /*
932 * vectp also becomes our initial stack base
933 */
934 stack_base = vectp;
935
936 stringp = imgp->args->begin_argv;
937 argc = imgp->args->argc;
938 envc = imgp->args->envc;
939 /*
940 * Copy out strings - arguments and environment.
941 */
942 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
943
944 /*
945 * Fill in "ps_strings" struct for ps, w, etc.
946 */
947 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
948 suword32(&arginfo->ps_nargvstr, argc);
949
950 /*
951 * Fill in argument portion of vector table.
952 */
953 for (; argc > 0; --argc) {
954 suword32(vectp++, (uint32_t)(intptr_t)destp);
955 while (*stringp++ != 0)
956 destp++;
957 destp++;
958 }
959
960 /* a null vector table pointer separates the argp's from the envp's */
961 suword32(vectp++, 0);
962
963 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
964 suword32(&arginfo->ps_nenvstr, envc);
965
966 /*
967 * Fill in environment portion of vector table.
968 */
969 for (; envc > 0; --envc) {
970 suword32(vectp++, (uint32_t)(intptr_t)destp);
971 while (*stringp++ != 0)
972 destp++;
973 destp++;
974 }
975
976 /* end of vector table is a null pointer */
977 suword32(vectp, 0);
978
979 return ((register_t *)stack_base);
980}
981
982static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
983 "32-bit Linux emulation");
984
985static u_long linux32_maxdsiz = LINUX32_MAXDSIZ;
986SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
987 &linux32_maxdsiz, 0, "");
988static u_long linux32_maxssiz = LINUX32_MAXSSIZ;
989SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
990 &linux32_maxssiz, 0, "");
991static u_long linux32_maxvmem = LINUX32_MAXVMEM;
992SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
993 &linux32_maxvmem, 0, "");
994
995#if defined(DEBUG)
996SYSCTL_PROC(_compat_linux32, OID_AUTO, debug,
997 CTLTYPE_STRING | CTLFLAG_RW,
998 0, 0, linux_sysctl_debug, "A",
999 "Linux debugging control");
1000#endif
1001
1002static void
1003linux32_fixlimit(struct rlimit *rl, int which)
1004{
1005
1006 switch (which) {
1007 case RLIMIT_DATA:
1008 if (linux32_maxdsiz != 0) {
1009 if (rl->rlim_cur > linux32_maxdsiz)
1010 rl->rlim_cur = linux32_maxdsiz;
1011 if (rl->rlim_max > linux32_maxdsiz)
1012 rl->rlim_max = linux32_maxdsiz;
1013 }
1014 break;
1015 case RLIMIT_STACK:
1016 if (linux32_maxssiz != 0) {
1017 if (rl->rlim_cur > linux32_maxssiz)
1018 rl->rlim_cur = linux32_maxssiz;
1019 if (rl->rlim_max > linux32_maxssiz)
1020 rl->rlim_max = linux32_maxssiz;
1021 }
1022 break;
1023 case RLIMIT_VMEM:
1024 if (linux32_maxvmem != 0) {
1025 if (rl->rlim_cur > linux32_maxvmem)
1026 rl->rlim_cur = linux32_maxvmem;
1027 if (rl->rlim_max > linux32_maxvmem)
1028 rl->rlim_max = linux32_maxvmem;
1029 }
1030 break;
1031 }
1032}
1033
1034struct sysentvec elf_linux_sysvec = {
1035 .sv_size = LINUX_SYS_MAXSYSCALL,
1036 .sv_table = linux_sysent,
1037 .sv_mask = 0,
1038 .sv_sigsize = LINUX_SIGTBLSZ,
1039 .sv_sigtbl = bsd_to_linux_signal,
1040 .sv_errsize = ELAST + 1,
1041 .sv_errtbl = bsd_to_linux_errno,
1042 .sv_transtrap = translate_traps,
1043 .sv_fixup = elf_linux_fixup,
1044 .sv_sendsig = linux_sendsig,
1045 .sv_sigcode = &_binary_linux32_locore_o_start,
1046 .sv_szsigcode = &linux_szsigcode,
1047 .sv_prepsyscall = NULL,
1048 .sv_name = "Linux ELF32",
1049 .sv_coredump = elf32_coredump,
1050 .sv_imgact_try = exec_linux_imgact_try,
1051 .sv_minsigstksz = LINUX_MINSIGSTKSZ,
1052 .sv_pagesize = PAGE_SIZE,
1053 .sv_minuser = VM_MIN_ADDRESS,
1054 .sv_maxuser = LINUX32_MAXUSER,
1055 .sv_usrstack = LINUX32_USRSTACK,
1056 .sv_psstrings = LINUX32_PS_STRINGS,
1057 .sv_stackprot = VM_PROT_ALL,
1058 .sv_copyout_strings = linux_copyout_strings,
1059 .sv_setregs = exec_linux_setregs,
1060 .sv_fixlimit = linux32_fixlimit,
1061 .sv_maxssiz = &linux32_maxssiz,
1062 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
1063 .sv_set_syscall_retval = cpu_set_syscall_retval,
1064 .sv_fetch_syscall_args = linux32_fetch_syscall_args,
1065 .sv_syscallnames = NULL,
1066 .sv_shared_page_base = LINUX32_SHAREDPAGE,
1067 .sv_shared_page_len = PAGE_SIZE,
1068 .sv_schedtail = linux_schedtail,
1069 .sv_thread_detach = linux_thread_detach,
1070};
1071
1072static void
1073linux_vdso_install(void *param)
1074{
1075
1076 linux_szsigcode = (&_binary_linux32_locore_o_end -
1077 &_binary_linux32_locore_o_start);
1078
1079 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
1080 panic("Linux invalid vdso size\n");
1081
1082 __elfN(linux_vdso_fixup)(&elf_linux_sysvec);
1083
1084 linux_shared_page_obj = __elfN(linux_shared_page_init)
1085 (&linux_shared_page_mapping);
1086
1087 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE);
1088
1089 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
1090 linux_szsigcode);
1091 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
1092
1093 linux_kplatform = linux_shared_page_mapping +
1094 (linux_platform - (caddr_t)LINUX32_SHAREDPAGE);
1095}
1096SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
1097 (sysinit_cfunc_t)linux_vdso_install, NULL);
1098
1099static void
1100linux_vdso_deinstall(void *param)
1101{
1102
1103 __elfN(linux_shared_page_fini)(linux_shared_page_obj);
1104};
1105SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
1106 (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
1107
1108static char GNU_ABI_VENDOR[] = "GNU";
1109static int GNULINUX_ABI_DESC = 0;
1110
1111static boolean_t
1112linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1113{
1114 const Elf32_Word *desc;
1115 uintptr_t p;
1116
1117 p = (uintptr_t)(note + 1);
1118 p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1119
1120 desc = (const Elf32_Word *)p;
1121 if (desc[0] != GNULINUX_ABI_DESC)
1122 return (FALSE);
1123
1124 /*
1125 * For linux we encode osrel as follows (see linux_mib.c):
1126 * VVVMMMIII (version, major, minor), see linux_mib.c.
1127 */
1128 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1129
1130 return (TRUE);
1131}
1132
1133static Elf_Brandnote linux32_brandnote = {
1134 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
1135 .hdr.n_descsz = 16, /* XXX at least 16 */
1136 .hdr.n_type = 1,
1137 .vendor = GNU_ABI_VENDOR,
1138 .flags = BN_TRANSLATE_OSREL,
1139 .trans_osrel = linux32_trans_osrel
1140};
1141
1142static Elf32_Brandinfo linux_brand = {
1143 .brand = ELFOSABI_LINUX,
1144 .machine = EM_386,
1145 .compat_3_brand = "Linux",
1146 .emul_path = "/compat/linux",
1147 .interp_path = "/lib/ld-linux.so.1",
1148 .sysvec = &elf_linux_sysvec,
1149 .interp_newpath = NULL,
1150 .brand_note = &linux32_brandnote,
1151 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1152};
1153
1154static Elf32_Brandinfo linux_glibc2brand = {
1155 .brand = ELFOSABI_LINUX,
1156 .machine = EM_386,
1157 .compat_3_brand = "Linux",
1158 .emul_path = "/compat/linux",
1159 .interp_path = "/lib/ld-linux.so.2",
1160 .sysvec = &elf_linux_sysvec,
1161 .interp_newpath = NULL,
1162 .brand_note = &linux32_brandnote,
1163 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1164};
1165
1166Elf32_Brandinfo *linux_brandlist[] = {
1167 &linux_brand,
1168 &linux_glibc2brand,
1169 NULL
1170};
1171
1172static int
1173linux_elf_modevent(module_t mod, int type, void *data)
1174{
1175 Elf32_Brandinfo **brandinfo;
1176 int error;
1177 struct linux_ioctl_handler **lihp;
1178
1179 error = 0;
1180
1181 switch(type) {
1182 case MOD_LOAD:
1183 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1184 ++brandinfo)
1185 if (elf32_insert_brand_entry(*brandinfo) < 0)
1186 error = EINVAL;
1187 if (error == 0) {
1188 SET_FOREACH(lihp, linux_ioctl_handler_set)
1189 linux_ioctl_register_handler(*lihp);
1190 LIST_INIT(&futex_list);
1191 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1192 stclohz = (stathz ? stathz : hz);
1193 if (bootverbose)
1194 printf("Linux ELF exec handler installed\n");
1195 } else
1196 printf("cannot insert Linux ELF brand handler\n");
1197 break;
1198 case MOD_UNLOAD:
1199 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1200 ++brandinfo)
1201 if (elf32_brand_inuse(*brandinfo))
1202 error = EBUSY;
1203 if (error == 0) {
1204 for (brandinfo = &linux_brandlist[0];
1205 *brandinfo != NULL; ++brandinfo)
1206 if (elf32_remove_brand_entry(*brandinfo) < 0)
1207 error = EINVAL;
1208 }
1209 if (error == 0) {
1210 SET_FOREACH(lihp, linux_ioctl_handler_set)
1211 linux_ioctl_unregister_handler(*lihp);
1212 mtx_destroy(&futex_mtx);
1213 if (bootverbose)
1214 printf("Linux ELF exec handler removed\n");
1215 } else
1216 printf("Could not deinstall ELF interpreter entry\n");
1217 break;
1218 default:
1219 return (EOPNOTSUPP);
1220 }
1221 return (error);
1222}
1223
1224static moduledata_t linux_elf_mod = {
1225 "linuxelf",
1226 linux_elf_modevent,
1227 0
1228};
1229
1230DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1231MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1);