1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 1994-1996 S��ren Schmidt
5 * Copyright (c) 2018 Turing Robotic Industries Inc.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD$");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/cdefs.h>
35#include <sys/elf.h>
36#include <sys/exec.h>
37#include <sys/imgact.h>
38#include <sys/imgact_elf.h>
39#include <sys/kernel.h>
40#include <sys/lock.h>
41#include <sys/module.h>
42#include <sys/mutex.h>
43#include <sys/proc.h>
44#include <sys/signalvar.h>
45#include <sys/sysctl.h>
46#include <sys/sysent.h>
47
48#include <vm/vm_param.h>
49
50#include <arm64/linux/linux.h>
51#include <arm64/linux/linux_proto.h>
52#include <compat/linux/linux_dtrace.h>
53#include <compat/linux/linux_emul.h>
54#include <compat/linux/linux_ioctl.h>
55#include <compat/linux/linux_mib.h>
56#include <compat/linux/linux_misc.h>
57#include <compat/linux/linux_util.h>
58#include <compat/linux/linux_vdso.h>
59
60MODULE_VERSION(linux64elf, 1);
61
62const char *linux_kplatform;
63static int linux_szsigcode;
64static vm_object_t linux_shared_page_obj;
65static char *linux_shared_page_mapping;
66extern char _binary_linux_locore_o_start;
67extern char _binary_linux_locore_o_end;
68
69extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
70
71SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
72
73static register_t *linux_copyout_strings(struct image_params *imgp);
74static int	linux_elf_fixup(register_t **stack_base,
75		    struct image_params *iparams);
76static bool	linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
77static void	linux_vdso_install(const void *param);
78static void	linux_vdso_deinstall(const void *param);
79static void	linux_set_syscall_retval(struct thread *td, int error);
80static int	linux_fetch_syscall_args(struct thread *td);
81static void	linux_exec_setregs(struct thread *td, struct image_params *imgp,
82		    u_long stack);
83static int	linux_vsyscall(struct thread *td);
84
85/* DTrace init */
86LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
87
88/* DTrace probes */
89LIN_SDT_PROBE_DEFINE2(sysvec, linux_translate_traps, todo, "int", "int");
90LIN_SDT_PROBE_DEFINE0(sysvec, linux_exec_setregs, todo);
91LIN_SDT_PROBE_DEFINE0(sysvec, linux_elf_fixup, todo);
92LIN_SDT_PROBE_DEFINE0(sysvec, linux_rt_sigreturn, todo);
93LIN_SDT_PROBE_DEFINE0(sysvec, linux_rt_sendsig, todo);
94LIN_SDT_PROBE_DEFINE0(sysvec, linux_vsyscall, todo);
95LIN_SDT_PROBE_DEFINE0(sysvec, linux_vdso_install, todo);
96LIN_SDT_PROBE_DEFINE0(sysvec, linux_vdso_deinstall, todo);
97
98/* LINUXTODO: do we have traps to translate? */
99static int
100linux_translate_traps(int signal, int trap_code)
101{
102
103	LIN_SDT_PROBE2(sysvec, linux_translate_traps, todo, signal, trap_code);
104	return (signal);
105}
106
107LINUX_VDSO_SYM_CHAR(linux_platform);
108
109static int
110linux_fetch_syscall_args(struct thread *td)
111{
112	struct proc *p;
113	struct syscall_args *sa;
114	register_t *ap;
115
116	p = td->td_proc;
117	ap = td->td_frame->tf_x;
118	sa = &td->td_sa;
119
120	sa->code = td->td_frame->tf_x[8];
121	/* LINUXTODO: generic syscall? */
122	if (p->p_sysent->sv_mask)
123		sa->code &= p->p_sysent->sv_mask;
124	if (sa->code >= p->p_sysent->sv_size)
125		sa->callp = &p->p_sysent->sv_table[0];
126	else
127		sa->callp = &p->p_sysent->sv_table[sa->code];
128
129	sa->narg = sa->callp->sy_narg;
130	if (sa->narg > 8)
131		panic("ARM64TODO: Could we have more than 8 args?");
132	memcpy(sa->args, ap, 8 * sizeof(register_t));
133
134	td->td_retval[0] = 0;
135	return (0);
136}
137
138static void
139linux_set_syscall_retval(struct thread *td, int error)
140{
141
142	td->td_retval[1] = td->td_frame->tf_x[1];
143	cpu_set_syscall_retval(td, error);
144}
145
146static int
147linux_elf_fixup(register_t **stack_base, struct image_params *imgp)
148{
149	Elf_Auxargs *args;
150	Elf_Auxinfo *argarray, *pos;
151	Elf_Addr *auxbase, *base;
152	struct ps_strings *arginfo;
153	struct proc *p;
154	int error, issetugid;
155
156	LIN_SDT_PROBE0(sysvec, linux_elf_fixup, todo);
157	p = imgp->proc;
158	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
159
160	KASSERT(curthread->td_proc == imgp->proc,
161	    ("unsafe linux_elf_fixup(), should be curproc"));
162	base = (Elf64_Addr *)*stack_base;
163	args = (Elf64_Auxargs *)imgp->auxargs;
164	/* Auxargs after argc, and NULL-terminated argv and envv lists. */
165	auxbase = base + 1 + imgp->args->argc + 1 + imgp->args->envc + 1;
166	argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP,
167	    M_WAITOK | M_ZERO);
168
169	issetugid = p->p_flag & P_SUGID ? 1 : 0;
170	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
171	    imgp->proc->p_sysent->sv_shared_page_base);
172#if 0	/* LINUXTODO: implement arm64 LINUX_AT_HWCAP */
173	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
174#endif
175	AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
176	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
177	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
178	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
179	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
180	AUXARGS_ENTRY(pos, AT_BASE, args->base);
181	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
182	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
183	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
184	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
185	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
186	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
187	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid);
188#if 0	/* LINUXTODO: implement arm64 LINUX_AT_PLATFORM */
189	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform));
190#endif
191	AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary);
192	if (imgp->execpathp != 0)
193		AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp);
194	if (args->execfd != -1)
195		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
196	AUXARGS_ENTRY(pos, AT_NULL, 0);
197	free(imgp->auxargs, M_TEMP);
198	imgp->auxargs = NULL;
199	KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs"));
200
201	error = copyout(argarray, auxbase, sizeof(*argarray) * LINUX_AT_COUNT);
202	free(argarray, M_TEMP);
203	if (error != 0)
204		return (error);
205
206	return (0);
207}
208
209/*
210 * Copy strings out to the new process address space, constructing new arg
211 * and env vector tables. Return a pointer to the base so that it can be used
212 * as the initial stack pointer.
213 * LINUXTODO: deduplicate against other linuxulator archs
214 */
215static register_t *
216linux_copyout_strings(struct image_params *imgp)
217{
218	char **vectp;
219	char *stringp, *destp;
220	register_t *stack_base;
221	struct ps_strings *arginfo;
222	char canary[LINUX_AT_RANDOM_LEN];
223	size_t execpath_len;
224	struct proc *p;
225	int argc, envc;
226
227	/* Calculate string base and vector table pointers. */
228	if (imgp->execpath != NULL && imgp->auxargs != NULL)
229		execpath_len = strlen(imgp->execpath) + 1;
230	else
231		execpath_len = 0;
232
233	p = imgp->proc;
234	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
235	destp = (caddr_t)arginfo - SPARE_USRSPACE -
236	    roundup(sizeof(canary), sizeof(char *)) -
237	    roundup(execpath_len, sizeof(char *)) -
238	    roundup(ARG_MAX - imgp->args->stringspace, sizeof(char *));
239
240	if (execpath_len != 0) {
241		imgp->execpathp = (uintptr_t)arginfo - execpath_len;
242		copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len);
243	}
244
245	/* Prepare the canary for SSP. */
246	arc4rand(canary, sizeof(canary), 0);
247	imgp->canary = (uintptr_t)arginfo -
248	    roundup(execpath_len, sizeof(char *)) -
249	    roundup(sizeof(canary), sizeof(char *));
250	copyout(canary, (void *)imgp->canary, sizeof(canary));
251
252	vectp = (char **)destp;
253	if (imgp->auxargs) {
254		/*
255		 * Allocate room on the stack for the ELF auxargs
256		 * array.  It has up to LINUX_AT_COUNT entries.
257		 */
258		vectp -= howmany(LINUX_AT_COUNT * sizeof(Elf64_Auxinfo),
259		    sizeof(*vectp));
260	}
261
262	/*
263	 * Allocate room for argc and the argv[] and env vectors including the
264	 * terminating NULL pointers.
265	 */
266	vectp -= 1 + imgp->args->argc + 1 + imgp->args->envc + 1;
267	vectp = (char **)STACKALIGN(vectp);
268
269	/* vectp also becomes our initial stack base. */
270	stack_base = (register_t *)vectp;
271
272	stringp = imgp->args->begin_argv;
273	argc = imgp->args->argc;
274	envc = imgp->args->envc;
275
276	/* Copy out strings - arguments and environment. */
277	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
278
279	/* Fill in "ps_strings" struct for ps, w, etc. */
280	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
281	suword(&arginfo->ps_nargvstr, argc);
282
283	suword(vectp++, argc);
284	/* Fill in argument portion of vector table. */
285	for (; argc > 0; --argc) {
286		suword(vectp++, (long)(intptr_t)destp);
287		while (*stringp++ != 0)
288			destp++;
289		destp++;
290	}
291
292	/* A null vector table pointer separates the argp's from the envp's. */
293	suword(vectp++, 0);
294
295	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
296	suword(&arginfo->ps_nenvstr, envc);
297
298	/* Fill in environment portion of vector table. */
299	for (; envc > 0; --envc) {
300		suword(vectp++, (long)(intptr_t)destp);
301		while (*stringp++ != 0)
302			destp++;
303		destp++;
304	}
305
306	/* The end of the vector table is a null pointer. */
307	suword(vectp, 0);
308	return (stack_base);
309}
310
311/*
312 * Reset registers to default values on exec.
313 */
314static void
315linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
316{
317	struct trapframe *regs = td->td_frame;
318
319	/* LINUXTODO: validate */
320	LIN_SDT_PROBE0(sysvec, linux_exec_setregs, todo);
321
322	memset(regs, 0, sizeof(*regs));
323	/* glibc start.S registers function pointer in x0 with atexit. */
324        regs->tf_sp = stack;
325#if 0	/* LINUXTODO: See if this is used. */
326	regs->tf_lr = imgp->entry_addr;
327#else
328        regs->tf_lr = 0xffffffffffffffff;
329#endif
330        regs->tf_elr = imgp->entry_addr;
331}
332
333int
334linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
335{
336
337	/* LINUXTODO: implement */
338	LIN_SDT_PROBE0(sysvec, linux_rt_sigreturn, todo);
339	return (EDOOFUS);
340}
341
342static void
343linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
344{
345
346	/* LINUXTODO: implement */
347	LIN_SDT_PROBE0(sysvec, linux_rt_sendsig, todo);
348}
349
350static int
351linux_vsyscall(struct thread *td)
352{
353
354	/* LINUXTODO: implement */
355	LIN_SDT_PROBE0(sysvec, linux_vsyscall, todo);
356	return (EDOOFUS);
357}
358
359struct sysentvec elf_linux_sysvec = {
360	.sv_size	= LINUX_SYS_MAXSYSCALL,
361	.sv_table	= linux_sysent,
362	.sv_mask	= 0,
363	.sv_errsize	= ELAST + 1,
364	.sv_errtbl	= linux_errtbl,
365	.sv_transtrap	= linux_translate_traps,
366	.sv_fixup	= linux_elf_fixup,
367	.sv_sendsig	= linux_rt_sendsig,
368	.sv_sigcode	= &_binary_linux_locore_o_start,
369	.sv_szsigcode	= &linux_szsigcode,
370	.sv_name	= "Linux ELF64",
371	.sv_coredump	= elf64_coredump,
372	.sv_imgact_try	= linux_exec_imgact_try,
373	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
374	.sv_minuser	= VM_MIN_ADDRESS,
375	.sv_maxuser	= VM_MAXUSER_ADDRESS,
376	.sv_usrstack	= USRSTACK,
377	.sv_psstrings	= PS_STRINGS, /* XXX */
378	.sv_stackprot	= VM_PROT_READ | VM_PROT_WRITE,
379	.sv_copyout_strings = linux_copyout_strings,
380	.sv_setregs	= linux_exec_setregs,
381	.sv_fixlimit	= NULL,
382	.sv_maxssiz	= NULL,
383	.sv_flags	= SV_ABI_LINUX | SV_LP64 | SV_SHP,
384	.sv_set_syscall_retval = linux_set_syscall_retval,
385	.sv_fetch_syscall_args = linux_fetch_syscall_args,
386	.sv_syscallnames = NULL,
387	.sv_shared_page_base = SHAREDPAGE,
388	.sv_shared_page_len = PAGE_SIZE,
389	.sv_schedtail	= linux_schedtail,
390	.sv_thread_detach = linux_thread_detach,
391	.sv_trap	= linux_vsyscall,
392};
393
394static void
395linux_vdso_install(const void *param)
396{
397
398	linux_szsigcode = (&_binary_linux_locore_o_end -
399	    &_binary_linux_locore_o_start);
400
401	if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
402		panic("invalid Linux VDSO size\n");
403
404	__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
405
406	linux_shared_page_obj = __elfN(linux_shared_page_init)
407	    (&linux_shared_page_mapping);
408
409	__elfN(linux_vdso_reloc)(&elf_linux_sysvec);
410
411	memcpy(linux_shared_page_mapping, elf_linux_sysvec.sv_sigcode,
412	    linux_szsigcode);
413	elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
414
415	printf("LINUXTODO: %s: fix linux_kplatform\n", __func__);
416#if 0
417	linux_kplatform = linux_shared_page_mapping +
418	    (linux_platform - (caddr_t)elf_linux_sysvec.sv_shared_page_base);
419#else
420	linux_kplatform = "arm64";
421#endif
422}
423SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
424    linux_vdso_install, NULL);
425
426static void
427linux_vdso_deinstall(const void *param)
428{
429
430	LIN_SDT_PROBE0(sysvec, linux_vdso_deinstall, todo);
431	__elfN(linux_shared_page_fini)(linux_shared_page_obj);
432}
433SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
434    linux_vdso_deinstall, NULL);
435
436static char GNU_ABI_VENDOR[] = "GNU";
437static int GNU_ABI_LINUX = 0;
438
439/* LINUXTODO: deduplicate */
440static bool
441linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
442{
443	const Elf32_Word *desc;
444	uintptr_t p;
445
446	p = (uintptr_t)(note + 1);
447	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
448
449	desc = (const Elf32_Word *)p;
450	if (desc[0] != GNU_ABI_LINUX)
451		return (false);
452
453	*osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]);
454	return (true);
455}
456
457static Elf_Brandnote linux64_brandnote = {
458	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
459	.hdr.n_descsz	= 16,
460	.hdr.n_type	= 1,
461	.vendor		= GNU_ABI_VENDOR,
462	.flags		= BN_TRANSLATE_OSREL,
463	.trans_osrel	= linux_trans_osrel
464};
465
466static Elf64_Brandinfo linux_glibc2brand = {
467	.brand		= ELFOSABI_LINUX,
468	.machine	= EM_AARCH64,
469	.compat_3_brand	= "Linux",
470	.emul_path	= linux_emul_path,
471	.interp_path	= "/lib64/ld-linux-x86-64.so.2",
472	.sysvec		= &elf_linux_sysvec,
473	.interp_newpath	= NULL,
474	.brand_note	= &linux64_brandnote,
475	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
476};
477
478Elf64_Brandinfo *linux_brandlist[] = {
479	&linux_glibc2brand,
480	NULL
481};
482
483static int
484linux64_elf_modevent(module_t mod, int type, void *data)
485{
486	Elf64_Brandinfo **brandinfo;
487	struct linux_ioctl_handler**lihp;
488	int error;
489
490	error = 0;
491	switch(type) {
492	case MOD_LOAD:
493		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
494		    ++brandinfo)
495			if (elf64_insert_brand_entry(*brandinfo) < 0)
496				error = EINVAL;
497		if (error == 0) {
498			SET_FOREACH(lihp, linux_ioctl_handler_set)
499				linux_ioctl_register_handler(*lihp);
500			stclohz = (stathz ? stathz : hz);
501			if (bootverbose)
502				printf("Linux arm64 ELF exec handler installed\n");
503		}
504		break;
505	case MOD_UNLOAD:
506		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
507		    ++brandinfo)
508			if (elf64_brand_inuse(*brandinfo))
509				error = EBUSY;
510		if (error == 0) {
511			for (brandinfo = &linux_brandlist[0];
512			    *brandinfo != NULL; ++brandinfo)
513				if (elf64_remove_brand_entry(*brandinfo) < 0)
514					error = EINVAL;
515		}
516		if (error == 0) {
517			SET_FOREACH(lihp, linux_ioctl_handler_set)
518				linux_ioctl_unregister_handler(*lihp);
519			if (bootverbose)
520				printf("Linux ELF exec handler removed\n");
521		} else
522			printf("Could not deinstall ELF interpreter entry\n");
523		break;
524	default:
525		return (EOPNOTSUPP);
526	}
527	return (error);
528}
529
530static moduledata_t linux64_elf_mod = {
531	"linux64elf",
532	linux64_elf_modevent,
533	0
534};
535
536DECLARE_MODULE_TIED(linux64elf, linux64_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
537MODULE_DEPEND(linux64elf, linux_common, 1, 1, 1);
538FEATURE(linux64, "AArch64 Linux 64bit support");
539