1239844Sdes/*
2239844Sdes * Copyright (c) 2012 Will Drewry <wad@dataspill.org>
3239844Sdes *
4239844Sdes * Permission to use, copy, modify, and distribute this software for any
5239844Sdes * purpose with or without fee is hereby granted, provided that the above
6239844Sdes * copyright notice and this permission notice appear in all copies.
7239844Sdes *
8239844Sdes * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9239844Sdes * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10239844Sdes * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11239844Sdes * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12239844Sdes * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13239844Sdes * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14239844Sdes * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15239844Sdes */
16239844Sdes
17239844Sdes/*
18239844Sdes * Uncomment the SANDBOX_SECCOMP_FILTER_DEBUG macro below to help diagnose
19239844Sdes * filter breakage during development. *Do not* use this in production,
20239844Sdes * as it relies on making library calls that are unsafe in signal context.
21239844Sdes *
22239844Sdes * Instead, live systems the auditctl(8) may be used to monitor failures.
23239844Sdes * E.g.
24239844Sdes *   auditctl -a task,always -F uid=<privsep uid>
25239844Sdes */
26239844Sdes/* #define SANDBOX_SECCOMP_FILTER_DEBUG 1 */
27239844Sdes
28295367Sdes/* XXX it should be possible to do logging via the log socket safely */
29295367Sdes
30239844Sdes#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
31239844Sdes/* Use the kernel headers in case of an older toolchain. */
32239844Sdes# include <asm/siginfo.h>
33239844Sdes# define __have_siginfo_t 1
34239844Sdes# define __have_sigval_t 1
35239844Sdes# define __have_sigevent_t 1
36239844Sdes#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
37239844Sdes
38239844Sdes#include "includes.h"
39239844Sdes
40239844Sdes#ifdef SANDBOX_SECCOMP_FILTER
41239844Sdes
42239844Sdes#include <sys/types.h>
43239844Sdes#include <sys/resource.h>
44239844Sdes#include <sys/prctl.h>
45239844Sdes
46295367Sdes#include <linux/net.h>
47239844Sdes#include <linux/audit.h>
48239844Sdes#include <linux/filter.h>
49239844Sdes#include <linux/seccomp.h>
50248613Sdes#include <elf.h>
51239844Sdes
52239844Sdes#include <asm/unistd.h>
53239844Sdes
54239844Sdes#include <errno.h>
55239844Sdes#include <signal.h>
56239844Sdes#include <stdarg.h>
57239844Sdes#include <stddef.h>  /* for offsetof */
58239844Sdes#include <stdio.h>
59239844Sdes#include <stdlib.h>
60239844Sdes#include <string.h>
61239844Sdes#include <unistd.h>
62239844Sdes
63239844Sdes#include "log.h"
64239844Sdes#include "ssh-sandbox.h"
65239844Sdes#include "xmalloc.h"
66239844Sdes
67239844Sdes/* Linux seccomp_filter sandbox */
68239844Sdes#define SECCOMP_FILTER_FAIL SECCOMP_RET_KILL
69239844Sdes
70239844Sdes/* Use a signal handler to emit violations when debugging */
71239844Sdes#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
72239844Sdes# undef SECCOMP_FILTER_FAIL
73239844Sdes# define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP
74239844Sdes#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
75239844Sdes
76239844Sdes/* Simple helpers to avoid manual errors (but larger BPF programs). */
77239844Sdes#define SC_DENY(_nr, _errno) \
78239844Sdes	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \
79239844Sdes	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno))
80239844Sdes#define SC_ALLOW(_nr) \
81239844Sdes	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \
82239844Sdes	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
83295367Sdes#define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \
84295367Sdes	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 4), \
85295367Sdes	/* load first syscall argument */ \
86295367Sdes	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
87295367Sdes	    offsetof(struct seccomp_data, args[(_arg_nr)])), \
88295367Sdes	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_arg_val), 0, 1), \
89295367Sdes	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
90295367Sdes	/* reload syscall number; all rules expect it in accumulator */ \
91295367Sdes	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
92295367Sdes		offsetof(struct seccomp_data, nr))
93239844Sdes
94239844Sdes/* Syscall filtering set for preauth. */
95239844Sdesstatic const struct sock_filter preauth_insns[] = {
96239844Sdes	/* Ensure the syscall arch convention is as expected. */
97239844Sdes	BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
98239844Sdes		offsetof(struct seccomp_data, arch)),
99239844Sdes	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0),
100239844Sdes	BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
101239844Sdes	/* Load the syscall number for checking. */
102239844Sdes	BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
103239844Sdes		offsetof(struct seccomp_data, nr)),
104295367Sdes
105295367Sdes	/* Syscalls to non-fatally deny */
106323124Sdes#ifdef __NR_lstat
107323124Sdes	SC_DENY(lstat, EACCES),
108323124Sdes#endif
109323124Sdes#ifdef __NR_lstat64
110323124Sdes	SC_DENY(lstat64, EACCES),
111323124Sdes#endif
112295367Sdes#ifdef __NR_fstat
113295367Sdes	SC_DENY(fstat, EACCES),
114295367Sdes#endif
115295367Sdes#ifdef __NR_fstat64
116295367Sdes	SC_DENY(fstat64, EACCES),
117295367Sdes#endif
118295367Sdes#ifdef __NR_open
119239844Sdes	SC_DENY(open, EACCES),
120295367Sdes#endif
121295367Sdes#ifdef __NR_openat
122295367Sdes	SC_DENY(openat, EACCES),
123295367Sdes#endif
124295367Sdes#ifdef __NR_newfstatat
125295367Sdes	SC_DENY(newfstatat, EACCES),
126295367Sdes#endif
127295367Sdes#ifdef __NR_stat
128295367Sdes	SC_DENY(stat, EACCES),
129295367Sdes#endif
130295367Sdes#ifdef __NR_stat64
131295367Sdes	SC_DENY(stat64, EACCES),
132295367Sdes#endif
133295367Sdes
134295367Sdes	/* Syscalls to permit */
135295367Sdes#ifdef __NR_brk
136295367Sdes	SC_ALLOW(brk),
137295367Sdes#endif
138295367Sdes#ifdef __NR_clock_gettime
139255767Sdes	SC_ALLOW(clock_gettime),
140248613Sdes#endif
141295367Sdes#ifdef __NR_close
142239844Sdes	SC_ALLOW(close),
143264377Sdes#endif
144295367Sdes#ifdef __NR_exit
145295367Sdes	SC_ALLOW(exit),
146239844Sdes#endif
147295367Sdes#ifdef __NR_exit_group
148295367Sdes	SC_ALLOW(exit_group),
149295367Sdes#endif
150295367Sdes#ifdef __NR_getpgid
151295367Sdes	SC_ALLOW(getpgid),
152295367Sdes#endif
153295367Sdes#ifdef __NR_getpid
154295367Sdes	SC_ALLOW(getpid),
155295367Sdes#endif
156296781Sdes#ifdef __NR_getrandom
157296781Sdes	SC_ALLOW(getrandom),
158296781Sdes#endif
159295367Sdes#ifdef __NR_gettimeofday
160295367Sdes	SC_ALLOW(gettimeofday),
161295367Sdes#endif
162295367Sdes#ifdef __NR_madvise
163239844Sdes	SC_ALLOW(madvise),
164248613Sdes#endif
165248613Sdes#ifdef __NR_mmap
166239844Sdes	SC_ALLOW(mmap),
167248613Sdes#endif
168295367Sdes#ifdef __NR_mmap2
169295367Sdes	SC_ALLOW(mmap2),
170295367Sdes#endif
171295367Sdes#ifdef __NR_mremap
172295367Sdes	SC_ALLOW(mremap),
173295367Sdes#endif
174295367Sdes#ifdef __NR_munmap
175239844Sdes	SC_ALLOW(munmap),
176295367Sdes#endif
177295367Sdes#ifdef __NR__newselect
178295367Sdes	SC_ALLOW(_newselect),
179295367Sdes#endif
180295367Sdes#ifdef __NR_poll
181295367Sdes	SC_ALLOW(poll),
182295367Sdes#endif
183295367Sdes#ifdef __NR_pselect6
184295367Sdes	SC_ALLOW(pselect6),
185295367Sdes#endif
186295367Sdes#ifdef __NR_read
187295367Sdes	SC_ALLOW(read),
188295367Sdes#endif
189239844Sdes#ifdef __NR_rt_sigprocmask
190239844Sdes	SC_ALLOW(rt_sigprocmask),
191295367Sdes#endif
192295367Sdes#ifdef __NR_select
193295367Sdes	SC_ALLOW(select),
194295367Sdes#endif
195295367Sdes#ifdef __NR_shutdown
196295367Sdes	SC_ALLOW(shutdown),
197295367Sdes#endif
198295367Sdes#ifdef __NR_sigprocmask
199239844Sdes	SC_ALLOW(sigprocmask),
200239844Sdes#endif
201295367Sdes#ifdef __NR_time
202295367Sdes	SC_ALLOW(time),
203295367Sdes#endif
204295367Sdes#ifdef __NR_write
205295367Sdes	SC_ALLOW(write),
206295367Sdes#endif
207295367Sdes#ifdef __NR_socketcall
208295367Sdes	SC_ALLOW_ARG(socketcall, 0, SYS_SHUTDOWN),
209295367Sdes#endif
210295367Sdes
211295367Sdes	/* Default deny */
212239844Sdes	BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
213239844Sdes};
214239844Sdes
215239844Sdesstatic const struct sock_fprog preauth_program = {
216239844Sdes	.len = (unsigned short)(sizeof(preauth_insns)/sizeof(preauth_insns[0])),
217239844Sdes	.filter = (struct sock_filter *)preauth_insns,
218239844Sdes};
219239844Sdes
220239844Sdesstruct ssh_sandbox {
221239844Sdes	pid_t child_pid;
222239844Sdes};
223239844Sdes
224239844Sdesstruct ssh_sandbox *
225262566Sdesssh_sandbox_init(struct monitor *monitor)
226239844Sdes{
227239844Sdes	struct ssh_sandbox *box;
228239844Sdes
229239844Sdes	/*
230239844Sdes	 * Strictly, we don't need to maintain any state here but we need
231239844Sdes	 * to return non-NULL to satisfy the API.
232239844Sdes	 */
233239844Sdes	debug3("%s: preparing seccomp filter sandbox", __func__);
234239844Sdes	box = xcalloc(1, sizeof(*box));
235239844Sdes	box->child_pid = 0;
236239844Sdes
237239844Sdes	return box;
238239844Sdes}
239239844Sdes
240239844Sdes#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
241239844Sdesextern struct monitor *pmonitor;
242239844Sdesvoid mm_log_handler(LogLevel level, const char *msg, void *ctx);
243239844Sdes
244239844Sdesstatic void
245239844Sdesssh_sandbox_violation(int signum, siginfo_t *info, void *void_context)
246239844Sdes{
247239844Sdes	char msg[256];
248239844Sdes
249239844Sdes	snprintf(msg, sizeof(msg),
250239844Sdes	    "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)",
251239844Sdes	    __func__, info->si_arch, info->si_syscall, info->si_call_addr);
252239844Sdes	mm_log_handler(SYSLOG_LEVEL_FATAL, msg, pmonitor);
253239844Sdes	_exit(1);
254239844Sdes}
255239844Sdes
256239844Sdesstatic void
257239844Sdesssh_sandbox_child_debugging(void)
258239844Sdes{
259239844Sdes	struct sigaction act;
260239844Sdes	sigset_t mask;
261239844Sdes
262239844Sdes	debug3("%s: installing SIGSYS handler", __func__);
263239844Sdes	memset(&act, 0, sizeof(act));
264239844Sdes	sigemptyset(&mask);
265239844Sdes	sigaddset(&mask, SIGSYS);
266239844Sdes
267239844Sdes	act.sa_sigaction = &ssh_sandbox_violation;
268239844Sdes	act.sa_flags = SA_SIGINFO;
269239844Sdes	if (sigaction(SIGSYS, &act, NULL) == -1)
270239844Sdes		fatal("%s: sigaction(SIGSYS): %s", __func__, strerror(errno));
271239844Sdes	if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
272239844Sdes		fatal("%s: sigprocmask(SIGSYS): %s",
273239844Sdes		      __func__, strerror(errno));
274239844Sdes}
275239844Sdes#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
276239844Sdes
277239844Sdesvoid
278239844Sdesssh_sandbox_child(struct ssh_sandbox *box)
279239844Sdes{
280239844Sdes	struct rlimit rl_zero;
281239849Sdes	int nnp_failed = 0;
282239844Sdes
283239844Sdes	/* Set rlimits for completeness if possible. */
284239844Sdes	rl_zero.rlim_cur = rl_zero.rlim_max = 0;
285239844Sdes	if (setrlimit(RLIMIT_FSIZE, &rl_zero) == -1)
286239844Sdes		fatal("%s: setrlimit(RLIMIT_FSIZE, { 0, 0 }): %s",
287239844Sdes			__func__, strerror(errno));
288239844Sdes	if (setrlimit(RLIMIT_NOFILE, &rl_zero) == -1)
289239844Sdes		fatal("%s: setrlimit(RLIMIT_NOFILE, { 0, 0 }): %s",
290239844Sdes			__func__, strerror(errno));
291239844Sdes	if (setrlimit(RLIMIT_NPROC, &rl_zero) == -1)
292239844Sdes		fatal("%s: setrlimit(RLIMIT_NPROC, { 0, 0 }): %s",
293239844Sdes			__func__, strerror(errno));
294239844Sdes
295239844Sdes#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
296239844Sdes	ssh_sandbox_child_debugging();
297239844Sdes#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
298239844Sdes
299239844Sdes	debug3("%s: setting PR_SET_NO_NEW_PRIVS", __func__);
300239849Sdes	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) {
301239849Sdes		debug("%s: prctl(PR_SET_NO_NEW_PRIVS): %s",
302239844Sdes		      __func__, strerror(errno));
303239849Sdes		nnp_failed = 1;
304239849Sdes	}
305239844Sdes	debug3("%s: attaching seccomp filter program", __func__);
306239844Sdes	if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &preauth_program) == -1)
307239849Sdes		debug("%s: prctl(PR_SET_SECCOMP): %s",
308239844Sdes		      __func__, strerror(errno));
309239849Sdes	else if (nnp_failed)
310239849Sdes		fatal("%s: SECCOMP_MODE_FILTER activated but "
311239849Sdes		    "PR_SET_NO_NEW_PRIVS failed", __func__);
312239844Sdes}
313239844Sdes
314239844Sdesvoid
315239844Sdesssh_sandbox_parent_finish(struct ssh_sandbox *box)
316239844Sdes{
317239844Sdes	free(box);
318239844Sdes	debug3("%s: finished", __func__);
319239844Sdes}
320239844Sdes
321239844Sdesvoid
322239844Sdesssh_sandbox_parent_preauth(struct ssh_sandbox *box, pid_t child_pid)
323239844Sdes{
324239844Sdes	box->child_pid = child_pid;
325239844Sdes}
326239844Sdes
327239844Sdes#endif /* SANDBOX_SECCOMP_FILTER */
328