1/*
2 * Copyright 2017, Data61
3 * Commonwealth Scientific and Industrial Research Organisation (CSIRO)
4 * ABN 41 687 119 230.
5 *
6 * This software may be distributed and modified according to the terms of
7 * the BSD 2-Clause license. Note that NO WARRANTY is provided.
8 * See "LICENSE_BSD2.txt" for details.
9 *
10 * @TAG(DATA61_BSD)
11 */
12
13#include <autoconf.h>
14#include <sel4muslcsys/gen_config.h>
15#include <stdio.h>
16#include <stdint.h>
17#include <stddef.h>
18#include <sel4/sel4.h>
19#include <sel4runtime.h>
20#include <utils/util.h>
21#include <bits/syscall.h>
22#include <bits/errno.h>
23#include <sys/uio.h>
24#include <muslcsys/vsyscall.h>
25#include "syscalls.h"
26#ifdef CONFIG_LIB_SEL4_MUSLC_SYS_CPIO_FS
27#include <cpio/cpio.h>
28#endif
29
30#define MUSLC_NUM_SYSCALLS (MUSLC_HIGHEST_SYSCALL + 1)
31
32/* For each TLS syscalls we record up to one occurance here that happens
33 * on startup before the syscall table is initialized. In the case of more
34 * than one occurance we will panic */
35static bool boot_set_tid_address_happened;
36static int *boot_set_tid_address_arg;
37
38#if defined(__NR_set_thread_area) || defined(__ARM_NR_set_tls)
39static bool boot_set_thread_area_happened;
40static void *boot_set_thread_area_arg;
41
42static long boot_set_thread_area(va_list ap)
43{
44    void *tp = va_arg(ap, void *);
45    if (boot_set_thread_area_happened) {
46        ZF_LOGE("Boot version of set_thread_area somehow got called twice");
47        return -ESRCH;
48    }
49
50    /* For platforms (such as aarch64) that have an architecturally defined thread pointer
51     * that is user accessible set_thread_area will happen internally in the C library.
52     * Unfortunately in x86-64 the method to *set* the thread pointer is not defined completely
53     * by the architecture, but the kernel *may* be using a strategy that allows us to
54     * write to it directly, but the C library will not know this. In this case we set the
55     * thread pointer here and then return, otherwise we have to resort to using the TCB invocation */
56#if defined(CONFIG_FSGSBASE_INST)
57    asm volatile("wrfsbase %0" :: "r"(tp));
58#else
59    char *tcb_string = getenv("boot_tcb_cptr");
60    if (tcb_string) {
61        seL4_CPtr tcb;
62        if (sscanf(tcb_string, "%p", (void **)&tcb) == 1) {
63            seL4_TCB_SetTLSBase(tcb, (seL4_Word)tp);
64        }
65    }
66#endif
67    boot_set_thread_area_happened = true;
68    boot_set_thread_area_arg = tp;
69    return 0;
70}
71
72bool muslcsys_get_boot_set_thread_area(void **arg)
73{
74    *arg = boot_set_thread_area_arg;
75    return boot_set_thread_area_happened;
76}
77#endif
78
79static long boot_set_tid_address(va_list ap)
80{
81    int *tid = va_arg(ap, int *);
82    if (boot_set_tid_address_happened) {
83        ZF_LOGE("Boot version of set_tid_address somehow got called twice");
84        return 1;
85    }
86    boot_set_tid_address_happened = true;
87    boot_set_tid_address_arg = tid;
88    return 1;
89}
90
91bool muslcsys_get_boot_set_tid_address(int **arg)
92{
93    *arg = boot_set_tid_address_arg;
94    return boot_set_tid_address_happened;
95}
96
97/* Basic sys_writev for use during booting that will only use seL4_DebugPutChar */
98long boot_sys_writev(va_list ap)
99{
100    int UNUSED fildes = va_arg(ap, int);
101    struct iovec *iov = va_arg(ap, struct iovec *);
102    int iovcnt = va_arg(ap, int);
103
104    ssize_t ret = 0;
105
106    for (int i = 0; i < iovcnt; i++) {
107        char *UNUSED base = (char *)iov[i].iov_base;
108        for (int j = 0; j < iov[i].iov_len; j++) {
109#ifdef CONFIG_PRINTING
110            seL4_DebugPutChar(base[j]);
111#endif
112            ret++;
113        }
114    }
115
116    return ret;
117}
118
119static muslcsys_syscall_t syscall_table[MUSLC_NUM_SYSCALLS] = {
120#ifdef __NR_set_thread_area
121    [__NR_set_thread_area] = boot_set_thread_area,
122#endif
123    [__NR_set_tid_address] = boot_set_tid_address,
124    [__NR_writev] = boot_sys_writev,
125    /* We don't need a boot_sys_write variant as this implementation wraps
126     * whatever __NR_writev is set to. */
127    [__NR_write] = sys_write,
128    [__NR_sched_yield] = sys_sched_yield,
129    [__NR_exit] = sys_exit,
130    [__NR_rt_sigprocmask] = sys_rt_sigprocmask,
131    [__NR_gettid] = sys_gettid,
132    [__NR_getpid] = sys_getpid,
133    [__NR_tgkill] = sys_tgkill,
134    [__NR_tkill] = sys_tkill,
135    [__NR_exit_group] = sys_exit_group,
136#ifdef __NR_open
137    [__NR_open] = sys_open,
138#endif
139#ifdef __NR_openat
140    [__NR_openat] = sys_openat,
141#endif
142    [__NR_close] = sys_close,
143    [__NR_readv] = sys_readv,
144    [__NR_read] = sys_read,
145    [__NR_ioctl] = sys_ioctl,
146    [__NR_prlimit64] = sys_prlimit64,
147    [__NR_lseek] = sys_lseek,
148#ifdef __NR__llseek
149    [__NR__llseek] = sys__llseek,
150#endif
151#ifdef __NR_access
152    [__NR_access] = sys_access,
153#endif
154    [__NR_brk] = sys_brk,
155#ifdef __NR_mmap2
156    [__NR_mmap2] = sys_mmap2,
157#endif
158#ifdef __NR_mmap
159    [__NR_mmap] = sys_mmap,
160#endif
161    [__NR_mremap] = sys_mremap,
162    [__NR_madvise] = sys_madvise,
163};
164
165/* Additional syscall lookup table for handling spare syscalls or syscalls that have large
166 * numbers. Currently The number of these is very small and so it's an unordered list that
167 * must be searched to find a syscall */
168typedef struct sparse_syscall {
169    int sysnum;
170    muslcsys_syscall_t syscall;
171} sparse_syscall_t;
172
173static sparse_syscall_t sparse_syscall_table[] = {
174#ifdef __ARM_NR_breakpoint
175    {__ARM_NR_breakpoint, NULL},
176#endif
177#ifdef __ARM_NR_cacheflush
178    {__ARM_NR_cacheflush, NULL},
179#endif
180#ifdef __ARM_NR_usr26
181    {__ARM_NR_usr26, NULL},
182#endif
183#ifdef __ARM_NR_usr32
184    {__ARM_NR_usr32, NULL},
185#endif
186#ifdef __ARM_NR_set_tls
187    {__ARM_NR_set_tls, boot_set_thread_area},
188#endif
189};
190
191static int find_sparse_syscall(int syscall)
192{
193    for (int i = 0; i < ARRAY_SIZE(sparse_syscall_table); i++) {
194        if (sparse_syscall_table[i].sysnum == syscall) {
195            return i;
196        }
197    }
198    return -1;
199}
200
201muslcsys_syscall_t muslcsys_install_syscall(int syscall, muslcsys_syscall_t new_syscall)
202{
203    muslcsys_syscall_t ret;
204    if (syscall >= ARRAY_SIZE(syscall_table)) {
205        int index = find_sparse_syscall(syscall);
206        if (index < 0) {
207            ZF_LOGF("Syscall %d exceeds syscall table size of %zu and not found in sparse table", syscall,
208                    ARRAY_SIZE(syscall_table));
209        }
210        ret = sparse_syscall_table[index].syscall;
211        sparse_syscall_table[index].syscall = ret;
212    } else {
213        ret = syscall_table[syscall];
214        syscall_table[syscall] = new_syscall;
215    }
216    return ret;
217}
218
219/* Switch the thread syscalls from their boot variant to their regular
220 * default implementation. We do this at the lowest priority so that
221 * it can be overriden. We are able to have this constructor
222 * in this file since we know it will get looked at by the linker due
223 * to __vsyscall_ptr being here */
224static void CONSTRUCTOR(CONSTRUCTOR_MIN_PRIORITY) init_syscall_table(void)
225{
226    muslcsys_syscall_t ret UNUSED;
227    ret = muslcsys_install_syscall(__NR_set_tid_address, sys_set_tid_address);
228    assert(ret == boot_set_tid_address);
229#ifdef __NR_set_thread_area
230    ret = muslcsys_install_syscall(__NR_set_thread_area, sys_set_thread_area);
231    assert(ret == boot_set_thread_area);
232#endif
233#ifdef __ARM_NR_set_tls
234    ret = muslcsys_install_syscall(__ARM_NR_set_tls, NULL);
235    assert(ret == boot_set_thread_area);
236#endif
237    ret = muslcsys_install_syscall(__NR_writev, sys_writev);
238    assert(ret == boot_sys_writev);
239}
240
241/* If we have a default CPIO file interface defined in the config then install it here */
242#ifdef CONFIG_LIB_SEL4_MUSLC_SYS_CPIO_FS
243extern char _cpio_archive[];
244extern char _cpio_archive_end[];
245static void CONSTRUCTOR(CONSTRUCTOR_MIN_PRIORITY) install_default_cpio(void)
246{
247    unsigned long cpio_len = _cpio_archive_end - _cpio_archive;
248    muslcsys_install_cpio_interface(_cpio_archive, cpio_len, cpio_get_file);
249}
250#endif
251
252#ifdef CONFIG_PRINTING
253static void debug_error(int sysnum)
254{
255    char buf[100];
256    int i;
257    sprintf(buf, "libsel4muslcsys: Error attempting syscall %d\n", sysnum);
258    for (i = 0; buf[i]; i++) {
259        seL4_DebugPutChar(buf[i]);
260    }
261}
262#else
263static void debug_error(int sysnum)
264{
265}
266#endif
267
268long sel4_vsyscall(long sysnum, ...)
269{
270    va_list al;
271    va_start(al, sysnum);
272    muslcsys_syscall_t syscall;
273    if (sysnum < 0 || sysnum >= ARRAY_SIZE(syscall_table)) {
274        int index = find_sparse_syscall(sysnum);
275        if (index < 0) {
276            debug_error(sysnum);
277            return -ENOSYS;
278        }
279        syscall = sparse_syscall_table[index].syscall;
280    } else {
281        syscall = syscall_table[sysnum];
282    }
283    /* Check a syscall is implemented there */
284    if (!syscall) {
285        debug_error(sysnum);
286        return -ENOSYS;
287    }
288    /* Call it */
289    long ret = syscall(al);
290    va_end(al);
291    return ret;
292}
293
294extern void *__sysinfo;
295
296/* Set the virtual syscall handler so that a portion of muslc will
297 * function.
298 *
299 * This is required for apps using a dynamic heap, which need to make
300 * use of malloc in order to provide an implementation of brk and mmap
301 * that are used during the initialisation of muslc.
302 */
303static void CONSTRUCTOR(MUSLCSYS_WITH_VSYSCALL_PRIORITY - 1) init_vsyscall(void)
304{
305    __sysinfo = sel4_vsyscall;
306}
307
308/* Put a pointer to sel4_vsyscall in a special section so anyone loading us
309 * knows how to configure our syscall table */
310uintptr_t VISIBLE SECTION("__vsyscall") __vsyscall_ptr = (uintptr_t) sel4_vsyscall;
311
312/* muslc provides a function used to initialise the C standard library
313 * environment. */
314extern void __init_libc(char const *const *envp, char const *pn);
315
316/* This is needed to force GCC to re-read the TLS base address on some
317 * platforms when setting the IPC buffer address after it has changed.
318 *
319 * At higher optimisation levels on aarch64, GCC will read the location
320 * for `__sel4_ipc_buffer` only once in the same function, even across
321 * function calls, and thus will not update any newly created TLS region
322 * with the IPC buffer address.
323 */
324static void NO_INLINE update_ipc_buffer(seL4_IPCBuffer *tmp)
325{
326    __sel4_ipc_buffer = tmp;
327}
328
329/* Initialise muslc environment */
330void CONSTRUCTOR(CONFIG_LIB_SEL4_MUSLC_SYS_CONSTRUCTOR_PRIORITY) muslcsys_init_muslc(void)
331{
332    seL4_IPCBuffer *tmp = __sel4_ipc_buffer;
333    __init_libc(sel4runtime_envp(), sel4runtime_argv()[0]);
334    update_ipc_buffer(tmp);
335}
336