1/*
2 * Copyright (c) 2012 Will Drewry <wad@dataspill.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17/*
18 * Uncomment the SANDBOX_SECCOMP_FILTER_DEBUG macro below to help diagnose
19 * filter breakage during development. *Do not* use this in production,
20 * as it relies on making library calls that are unsafe in signal context.
21 *
22 * Instead, live systems the auditctl(8) may be used to monitor failures.
23 * E.g.
24 *   auditctl -a task,always -F uid=<privsep uid>
25 */
26/* #define SANDBOX_SECCOMP_FILTER_DEBUG 1 */
27
28/* XXX it should be possible to do logging via the log socket safely */
29
30#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
31/* Use the kernel headers in case of an older toolchain. */
32# include <asm/siginfo.h>
33# define __have_siginfo_t 1
34# define __have_sigval_t 1
35# define __have_sigevent_t 1
36#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
37
38#include "includes.h"
39
40#ifdef SANDBOX_SECCOMP_FILTER
41
42#include <sys/types.h>
43#include <sys/resource.h>
44#include <sys/prctl.h>
45
46#include <linux/net.h>
47#include <linux/audit.h>
48#include <linux/filter.h>
49#include <linux/seccomp.h>
50#include <elf.h>
51
52#include <asm/unistd.h>
53#ifdef __s390__
54#include <asm/zcrypt.h>
55#endif
56
57#include <errno.h>
58#include <signal.h>
59#include <stdarg.h>
60#include <stddef.h>  /* for offsetof */
61#include <stdio.h>
62#include <stdlib.h>
63#include <string.h>
64#include <unistd.h>
65
66#include "log.h"
67#include "ssh-sandbox.h"
68#include "xmalloc.h"
69
70/* Linux seccomp_filter sandbox */
71#define SECCOMP_FILTER_FAIL SECCOMP_RET_KILL
72
73/* Use a signal handler to emit violations when debugging */
74#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
75# undef SECCOMP_FILTER_FAIL
76# define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP
77#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
78
79#if __BYTE_ORDER == __LITTLE_ENDIAN
80# define ARG_LO_OFFSET  0
81# define ARG_HI_OFFSET  sizeof(uint32_t)
82#elif __BYTE_ORDER == __BIG_ENDIAN
83# define ARG_LO_OFFSET  sizeof(uint32_t)
84# define ARG_HI_OFFSET  0
85#else
86#error "Unknown endianness"
87#endif
88
89/* Simple helpers to avoid manual errors (but larger BPF programs). */
90#define SC_DENY(_nr, _errno) \
91	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \
92	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno))
93#define SC_ALLOW(_nr) \
94	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \
95	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
96#define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \
97	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 6), \
98	/* load and test first syscall argument, low word */ \
99	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
100	    offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \
101	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
102	    ((_arg_val) & 0xFFFFFFFF), 0, 3), \
103	/* load and test first syscall argument, high word */ \
104	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
105	    offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \
106	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
107	    (((uint32_t)((uint64_t)(_arg_val) >> 32)) & 0xFFFFFFFF), 0, 1), \
108	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
109	/* reload syscall number; all rules expect it in accumulator */ \
110	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
111		offsetof(struct seccomp_data, nr))
112
113/* Syscall filtering set for preauth. */
114static const struct sock_filter preauth_insns[] = {
115	/* Ensure the syscall arch convention is as expected. */
116	BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
117		offsetof(struct seccomp_data, arch)),
118	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0),
119	BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
120	/* Load the syscall number for checking. */
121	BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
122		offsetof(struct seccomp_data, nr)),
123
124	/* Syscalls to non-fatally deny */
125#ifdef __NR_lstat
126	SC_DENY(__NR_lstat, EACCES),
127#endif
128#ifdef __NR_lstat64
129	SC_DENY(__NR_lstat64, EACCES),
130#endif
131#ifdef __NR_fstat
132	SC_DENY(__NR_fstat, EACCES),
133#endif
134#ifdef __NR_fstat64
135	SC_DENY(__NR_fstat64, EACCES),
136#endif
137#ifdef __NR_open
138	SC_DENY(__NR_open, EACCES),
139#endif
140#ifdef __NR_openat
141	SC_DENY(__NR_openat, EACCES),
142#endif
143#ifdef __NR_newfstatat
144	SC_DENY(__NR_newfstatat, EACCES),
145#endif
146#ifdef __NR_stat
147	SC_DENY(__NR_stat, EACCES),
148#endif
149#ifdef __NR_stat64
150	SC_DENY(__NR_stat64, EACCES),
151#endif
152
153	/* Syscalls to permit */
154#ifdef __NR_brk
155	SC_ALLOW(__NR_brk),
156#endif
157#ifdef __NR_clock_gettime
158	SC_ALLOW(__NR_clock_gettime),
159#endif
160#ifdef __NR_close
161	SC_ALLOW(__NR_close),
162#endif
163#ifdef __NR_exit
164	SC_ALLOW(__NR_exit),
165#endif
166#ifdef __NR_exit_group
167	SC_ALLOW(__NR_exit_group),
168#endif
169#ifdef __NR_futex
170	SC_ALLOW(__NR_futex),
171#endif
172#ifdef __NR_geteuid
173	SC_ALLOW(__NR_geteuid),
174#endif
175#ifdef __NR_geteuid32
176	SC_ALLOW(__NR_geteuid32),
177#endif
178#ifdef __NR_getpgid
179	SC_ALLOW(__NR_getpgid),
180#endif
181#ifdef __NR_getpid
182	SC_ALLOW(__NR_getpid),
183#endif
184#ifdef __NR_getrandom
185	SC_ALLOW(__NR_getrandom),
186#endif
187#ifdef __NR_gettimeofday
188	SC_ALLOW(__NR_gettimeofday),
189#endif
190#ifdef __NR_getuid
191	SC_ALLOW(__NR_getuid),
192#endif
193#ifdef __NR_getuid32
194	SC_ALLOW(__NR_getuid32),
195#endif
196#ifdef __NR_madvise
197	SC_ALLOW(__NR_madvise),
198#endif
199#ifdef __NR_mmap
200	SC_ALLOW(__NR_mmap),
201#endif
202#ifdef __NR_mmap2
203	SC_ALLOW(__NR_mmap2),
204#endif
205#ifdef __NR_mremap
206	SC_ALLOW(__NR_mremap),
207#endif
208#ifdef __NR_munmap
209	SC_ALLOW(__NR_munmap),
210#endif
211#ifdef __NR_nanosleep
212	SC_ALLOW(__NR_nanosleep),
213#endif
214#ifdef __NR__newselect
215	SC_ALLOW(__NR__newselect),
216#endif
217#ifdef __NR_poll
218	SC_ALLOW(__NR_poll),
219#endif
220#ifdef __NR_pselect6
221	SC_ALLOW(__NR_pselect6),
222#endif
223#ifdef __NR_read
224	SC_ALLOW(__NR_read),
225#endif
226#ifdef __NR_rt_sigprocmask
227	SC_ALLOW(__NR_rt_sigprocmask),
228#endif
229#ifdef __NR_select
230	SC_ALLOW(__NR_select),
231#endif
232#ifdef __NR_shutdown
233	SC_ALLOW(__NR_shutdown),
234#endif
235#ifdef __NR_sigprocmask
236	SC_ALLOW(__NR_sigprocmask),
237#endif
238#ifdef __NR_time
239	SC_ALLOW(__NR_time),
240#endif
241#ifdef __NR_write
242	SC_ALLOW(__NR_write),
243#endif
244#ifdef __NR_socketcall
245	SC_ALLOW_ARG(__NR_socketcall, 0, SYS_SHUTDOWN),
246	SC_DENY(__NR_socketcall, EACCES),
247#endif
248#if defined(__NR_ioctl) && defined(__s390__)
249	/* Allow ioctls for ICA crypto card on s390 */
250	SC_ALLOW_ARG(__NR_ioctl, 1, Z90STAT_STATUS_MASK),
251	SC_ALLOW_ARG(__NR_ioctl, 1, ICARSAMODEXPO),
252	SC_ALLOW_ARG(__NR_ioctl, 1, ICARSACRT),
253#endif
254#if defined(__x86_64__) && defined(__ILP32__) && defined(__X32_SYSCALL_BIT)
255	/*
256	 * On Linux x32, the clock_gettime VDSO falls back to the
257	 * x86-64 syscall under some circumstances, e.g.
258	 * https://bugs.debian.org/849923
259	 */
260	SC_ALLOW(__NR_clock_gettime & ~__X32_SYSCALL_BIT),
261#endif
262
263	/* Default deny */
264	BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
265};
266
267static const struct sock_fprog preauth_program = {
268	.len = (unsigned short)(sizeof(preauth_insns)/sizeof(preauth_insns[0])),
269	.filter = (struct sock_filter *)preauth_insns,
270};
271
272struct ssh_sandbox {
273	pid_t child_pid;
274};
275
276struct ssh_sandbox *
277ssh_sandbox_init(struct monitor *monitor)
278{
279	struct ssh_sandbox *box;
280
281	/*
282	 * Strictly, we don't need to maintain any state here but we need
283	 * to return non-NULL to satisfy the API.
284	 */
285	debug3("%s: preparing seccomp filter sandbox", __func__);
286	box = xcalloc(1, sizeof(*box));
287	box->child_pid = 0;
288
289	return box;
290}
291
292#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
293extern struct monitor *pmonitor;
294void mm_log_handler(LogLevel level, const char *msg, void *ctx);
295
296static void
297ssh_sandbox_violation(int signum, siginfo_t *info, void *void_context)
298{
299	char msg[256];
300
301	snprintf(msg, sizeof(msg),
302	    "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)",
303	    __func__, info->si_arch, info->si_syscall, info->si_call_addr);
304	mm_log_handler(SYSLOG_LEVEL_FATAL, msg, pmonitor);
305	_exit(1);
306}
307
308static void
309ssh_sandbox_child_debugging(void)
310{
311	struct sigaction act;
312	sigset_t mask;
313
314	debug3("%s: installing SIGSYS handler", __func__);
315	memset(&act, 0, sizeof(act));
316	sigemptyset(&mask);
317	sigaddset(&mask, SIGSYS);
318
319	act.sa_sigaction = &ssh_sandbox_violation;
320	act.sa_flags = SA_SIGINFO;
321	if (sigaction(SIGSYS, &act, NULL) == -1)
322		fatal("%s: sigaction(SIGSYS): %s", __func__, strerror(errno));
323	if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
324		fatal("%s: sigprocmask(SIGSYS): %s",
325		      __func__, strerror(errno));
326}
327#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
328
329void
330ssh_sandbox_child(struct ssh_sandbox *box)
331{
332	struct rlimit rl_zero;
333	int nnp_failed = 0;
334
335	/* Set rlimits for completeness if possible. */
336	rl_zero.rlim_cur = rl_zero.rlim_max = 0;
337	if (setrlimit(RLIMIT_FSIZE, &rl_zero) == -1)
338		fatal("%s: setrlimit(RLIMIT_FSIZE, { 0, 0 }): %s",
339			__func__, strerror(errno));
340	if (setrlimit(RLIMIT_NOFILE, &rl_zero) == -1)
341		fatal("%s: setrlimit(RLIMIT_NOFILE, { 0, 0 }): %s",
342			__func__, strerror(errno));
343	if (setrlimit(RLIMIT_NPROC, &rl_zero) == -1)
344		fatal("%s: setrlimit(RLIMIT_NPROC, { 0, 0 }): %s",
345			__func__, strerror(errno));
346
347#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
348	ssh_sandbox_child_debugging();
349#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
350
351	debug3("%s: setting PR_SET_NO_NEW_PRIVS", __func__);
352	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) {
353		debug("%s: prctl(PR_SET_NO_NEW_PRIVS): %s",
354		      __func__, strerror(errno));
355		nnp_failed = 1;
356	}
357	debug3("%s: attaching seccomp filter program", __func__);
358	if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &preauth_program) == -1)
359		debug("%s: prctl(PR_SET_SECCOMP): %s",
360		      __func__, strerror(errno));
361	else if (nnp_failed)
362		fatal("%s: SECCOMP_MODE_FILTER activated but "
363		    "PR_SET_NO_NEW_PRIVS failed", __func__);
364}
365
366void
367ssh_sandbox_parent_finish(struct ssh_sandbox *box)
368{
369	free(box);
370	debug3("%s: finished", __func__);
371}
372
373void
374ssh_sandbox_parent_preauth(struct ssh_sandbox *box, pid_t child_pid)
375{
376	box->child_pid = child_pid;
377}
378
379#endif /* SANDBOX_SECCOMP_FILTER */
380