sandbox-seccomp-filter.c revision 323129
1/*
2 * Copyright (c) 2012 Will Drewry <wad@dataspill.org>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
16
17/*
18 * Uncomment the SANDBOX_SECCOMP_FILTER_DEBUG macro below to help diagnose
19 * filter breakage during development. *Do not* use this in production,
20 * as it relies on making library calls that are unsafe in signal context.
21 *
22 * Instead, live systems the auditctl(8) may be used to monitor failures.
23 * E.g.
24 *   auditctl -a task,always -F uid=<privsep uid>
25 */
26/* #define SANDBOX_SECCOMP_FILTER_DEBUG 1 */
27
28/* XXX it should be possible to do logging via the log socket safely */
29
30#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
31/* Use the kernel headers in case of an older toolchain. */
32# include <asm/siginfo.h>
33# define __have_siginfo_t 1
34# define __have_sigval_t 1
35# define __have_sigevent_t 1
36#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
37
38#include "includes.h"
39
40#ifdef SANDBOX_SECCOMP_FILTER
41
42#include <sys/types.h>
43#include <sys/resource.h>
44#include <sys/prctl.h>
45
46#include <linux/net.h>
47#include <linux/audit.h>
48#include <linux/filter.h>
49#include <linux/seccomp.h>
50#include <elf.h>
51
52#include <asm/unistd.h>
53
54#include <errno.h>
55#include <signal.h>
56#include <stdarg.h>
57#include <stddef.h>  /* for offsetof */
58#include <stdio.h>
59#include <stdlib.h>
60#include <string.h>
61#include <unistd.h>
62
63#include "log.h"
64#include "ssh-sandbox.h"
65#include "xmalloc.h"
66
67/* Linux seccomp_filter sandbox */
68#define SECCOMP_FILTER_FAIL SECCOMP_RET_KILL
69
70/* Use a signal handler to emit violations when debugging */
71#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
72# undef SECCOMP_FILTER_FAIL
73# define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP
74#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
75
76/* Simple helpers to avoid manual errors (but larger BPF programs). */
77#define SC_DENY(_nr, _errno) \
78	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \
79	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno))
80#define SC_ALLOW(_nr) \
81	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \
82	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
83#define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \
84	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 4), \
85	/* load first syscall argument */ \
86	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
87	    offsetof(struct seccomp_data, args[(_arg_nr)])), \
88	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_arg_val), 0, 1), \
89	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
90	/* reload syscall number; all rules expect it in accumulator */ \
91	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
92		offsetof(struct seccomp_data, nr))
93
94/* Syscall filtering set for preauth. */
95static const struct sock_filter preauth_insns[] = {
96	/* Ensure the syscall arch convention is as expected. */
97	BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
98		offsetof(struct seccomp_data, arch)),
99	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0),
100	BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
101	/* Load the syscall number for checking. */
102	BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
103		offsetof(struct seccomp_data, nr)),
104
105	/* Syscalls to non-fatally deny */
106#ifdef __NR_lstat
107	SC_DENY(lstat, EACCES),
108#endif
109#ifdef __NR_lstat64
110	SC_DENY(lstat64, EACCES),
111#endif
112#ifdef __NR_fstat
113	SC_DENY(fstat, EACCES),
114#endif
115#ifdef __NR_fstat64
116	SC_DENY(fstat64, EACCES),
117#endif
118#ifdef __NR_open
119	SC_DENY(open, EACCES),
120#endif
121#ifdef __NR_openat
122	SC_DENY(openat, EACCES),
123#endif
124#ifdef __NR_newfstatat
125	SC_DENY(newfstatat, EACCES),
126#endif
127#ifdef __NR_stat
128	SC_DENY(stat, EACCES),
129#endif
130#ifdef __NR_stat64
131	SC_DENY(stat64, EACCES),
132#endif
133
134	/* Syscalls to permit */
135#ifdef __NR_brk
136	SC_ALLOW(brk),
137#endif
138#ifdef __NR_clock_gettime
139	SC_ALLOW(clock_gettime),
140#endif
141#ifdef __NR_close
142	SC_ALLOW(close),
143#endif
144#ifdef __NR_exit
145	SC_ALLOW(exit),
146#endif
147#ifdef __NR_exit_group
148	SC_ALLOW(exit_group),
149#endif
150#ifdef __NR_getpgid
151	SC_ALLOW(getpgid),
152#endif
153#ifdef __NR_getpid
154	SC_ALLOW(getpid),
155#endif
156#ifdef __NR_getrandom
157	SC_ALLOW(getrandom),
158#endif
159#ifdef __NR_gettimeofday
160	SC_ALLOW(gettimeofday),
161#endif
162#ifdef __NR_madvise
163	SC_ALLOW(madvise),
164#endif
165#ifdef __NR_mmap
166	SC_ALLOW(mmap),
167#endif
168#ifdef __NR_mmap2
169	SC_ALLOW(mmap2),
170#endif
171#ifdef __NR_mremap
172	SC_ALLOW(mremap),
173#endif
174#ifdef __NR_munmap
175	SC_ALLOW(munmap),
176#endif
177#ifdef __NR__newselect
178	SC_ALLOW(_newselect),
179#endif
180#ifdef __NR_poll
181	SC_ALLOW(poll),
182#endif
183#ifdef __NR_pselect6
184	SC_ALLOW(pselect6),
185#endif
186#ifdef __NR_read
187	SC_ALLOW(read),
188#endif
189#ifdef __NR_rt_sigprocmask
190	SC_ALLOW(rt_sigprocmask),
191#endif
192#ifdef __NR_select
193	SC_ALLOW(select),
194#endif
195#ifdef __NR_shutdown
196	SC_ALLOW(shutdown),
197#endif
198#ifdef __NR_sigprocmask
199	SC_ALLOW(sigprocmask),
200#endif
201#ifdef __NR_time
202	SC_ALLOW(time),
203#endif
204#ifdef __NR_write
205	SC_ALLOW(write),
206#endif
207#ifdef __NR_socketcall
208	SC_ALLOW_ARG(socketcall, 0, SYS_SHUTDOWN),
209#endif
210
211	/* Default deny */
212	BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
213};
214
215static const struct sock_fprog preauth_program = {
216	.len = (unsigned short)(sizeof(preauth_insns)/sizeof(preauth_insns[0])),
217	.filter = (struct sock_filter *)preauth_insns,
218};
219
220struct ssh_sandbox {
221	pid_t child_pid;
222};
223
224struct ssh_sandbox *
225ssh_sandbox_init(struct monitor *monitor)
226{
227	struct ssh_sandbox *box;
228
229	/*
230	 * Strictly, we don't need to maintain any state here but we need
231	 * to return non-NULL to satisfy the API.
232	 */
233	debug3("%s: preparing seccomp filter sandbox", __func__);
234	box = xcalloc(1, sizeof(*box));
235	box->child_pid = 0;
236
237	return box;
238}
239
240#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
241extern struct monitor *pmonitor;
242void mm_log_handler(LogLevel level, const char *msg, void *ctx);
243
244static void
245ssh_sandbox_violation(int signum, siginfo_t *info, void *void_context)
246{
247	char msg[256];
248
249	snprintf(msg, sizeof(msg),
250	    "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)",
251	    __func__, info->si_arch, info->si_syscall, info->si_call_addr);
252	mm_log_handler(SYSLOG_LEVEL_FATAL, msg, pmonitor);
253	_exit(1);
254}
255
256static void
257ssh_sandbox_child_debugging(void)
258{
259	struct sigaction act;
260	sigset_t mask;
261
262	debug3("%s: installing SIGSYS handler", __func__);
263	memset(&act, 0, sizeof(act));
264	sigemptyset(&mask);
265	sigaddset(&mask, SIGSYS);
266
267	act.sa_sigaction = &ssh_sandbox_violation;
268	act.sa_flags = SA_SIGINFO;
269	if (sigaction(SIGSYS, &act, NULL) == -1)
270		fatal("%s: sigaction(SIGSYS): %s", __func__, strerror(errno));
271	if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
272		fatal("%s: sigprocmask(SIGSYS): %s",
273		      __func__, strerror(errno));
274}
275#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
276
277void
278ssh_sandbox_child(struct ssh_sandbox *box)
279{
280	struct rlimit rl_zero;
281	int nnp_failed = 0;
282
283	/* Set rlimits for completeness if possible. */
284	rl_zero.rlim_cur = rl_zero.rlim_max = 0;
285	if (setrlimit(RLIMIT_FSIZE, &rl_zero) == -1)
286		fatal("%s: setrlimit(RLIMIT_FSIZE, { 0, 0 }): %s",
287			__func__, strerror(errno));
288	if (setrlimit(RLIMIT_NOFILE, &rl_zero) == -1)
289		fatal("%s: setrlimit(RLIMIT_NOFILE, { 0, 0 }): %s",
290			__func__, strerror(errno));
291	if (setrlimit(RLIMIT_NPROC, &rl_zero) == -1)
292		fatal("%s: setrlimit(RLIMIT_NPROC, { 0, 0 }): %s",
293			__func__, strerror(errno));
294
295#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
296	ssh_sandbox_child_debugging();
297#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
298
299	debug3("%s: setting PR_SET_NO_NEW_PRIVS", __func__);
300	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) {
301		debug("%s: prctl(PR_SET_NO_NEW_PRIVS): %s",
302		      __func__, strerror(errno));
303		nnp_failed = 1;
304	}
305	debug3("%s: attaching seccomp filter program", __func__);
306	if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &preauth_program) == -1)
307		debug("%s: prctl(PR_SET_SECCOMP): %s",
308		      __func__, strerror(errno));
309	else if (nnp_failed)
310		fatal("%s: SECCOMP_MODE_FILTER activated but "
311		    "PR_SET_NO_NEW_PRIVS failed", __func__);
312}
313
314void
315ssh_sandbox_parent_finish(struct ssh_sandbox *box)
316{
317	free(box);
318	debug3("%s: finished", __func__);
319}
320
321void
322ssh_sandbox_parent_preauth(struct ssh_sandbox *box, pid_t child_pid)
323{
324	box->child_pid = child_pid;
325}
326
327#endif /* SANDBOX_SECCOMP_FILTER */
328