1239844Sdes/*
2239844Sdes * Copyright (c) 2012 Will Drewry <wad@dataspill.org>
3239844Sdes *
4239844Sdes * Permission to use, copy, modify, and distribute this software for any
5239844Sdes * purpose with or without fee is hereby granted, provided that the above
6239844Sdes * copyright notice and this permission notice appear in all copies.
7239844Sdes *
8239844Sdes * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9239844Sdes * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10239844Sdes * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11239844Sdes * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12239844Sdes * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13239844Sdes * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14239844Sdes * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15239844Sdes */
16239844Sdes
17239844Sdes/*
18239844Sdes * Uncomment the SANDBOX_SECCOMP_FILTER_DEBUG macro below to help diagnose
19239844Sdes * filter breakage during development. *Do not* use this in production,
20239844Sdes * as it relies on making library calls that are unsafe in signal context.
21239844Sdes *
22239844Sdes * Instead, live systems the auditctl(8) may be used to monitor failures.
23239844Sdes * E.g.
24239844Sdes *   auditctl -a task,always -F uid=<privsep uid>
25239844Sdes */
26239844Sdes/* #define SANDBOX_SECCOMP_FILTER_DEBUG 1 */
27239844Sdes
28295367Sdes/* XXX it should be possible to do logging via the log socket safely */
29295367Sdes
30239844Sdes#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
31239844Sdes/* Use the kernel headers in case of an older toolchain. */
32239844Sdes# include <asm/siginfo.h>
33239844Sdes# define __have_siginfo_t 1
34239844Sdes# define __have_sigval_t 1
35239844Sdes# define __have_sigevent_t 1
36239844Sdes#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
37239844Sdes
38239844Sdes#include "includes.h"
39239844Sdes
40239844Sdes#ifdef SANDBOX_SECCOMP_FILTER
41239844Sdes
42239844Sdes#include <sys/types.h>
43239844Sdes#include <sys/resource.h>
44239844Sdes#include <sys/prctl.h>
45239844Sdes
46295367Sdes#include <linux/net.h>
47239844Sdes#include <linux/audit.h>
48239844Sdes#include <linux/filter.h>
49239844Sdes#include <linux/seccomp.h>
50248613Sdes#include <elf.h>
51239844Sdes
52239844Sdes#include <asm/unistd.h>
53239844Sdes
54239844Sdes#include <errno.h>
55239844Sdes#include <signal.h>
56239844Sdes#include <stdarg.h>
57239844Sdes#include <stddef.h>  /* for offsetof */
58239844Sdes#include <stdio.h>
59239844Sdes#include <stdlib.h>
60239844Sdes#include <string.h>
61239844Sdes#include <unistd.h>
62239844Sdes
63239844Sdes#include "log.h"
64239844Sdes#include "ssh-sandbox.h"
65239844Sdes#include "xmalloc.h"
66239844Sdes
67239844Sdes/* Linux seccomp_filter sandbox */
68239844Sdes#define SECCOMP_FILTER_FAIL SECCOMP_RET_KILL
69239844Sdes
70239844Sdes/* Use a signal handler to emit violations when debugging */
71239844Sdes#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
72239844Sdes# undef SECCOMP_FILTER_FAIL
73239844Sdes# define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP
74239844Sdes#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
75239844Sdes
76239844Sdes/* Simple helpers to avoid manual errors (but larger BPF programs). */
77239844Sdes#define SC_DENY(_nr, _errno) \
78239844Sdes	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \
79239844Sdes	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno))
80239844Sdes#define SC_ALLOW(_nr) \
81239844Sdes	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 1), \
82239844Sdes	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
83295367Sdes#define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \
84295367Sdes	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_ ## _nr, 0, 4), \
85295367Sdes	/* load first syscall argument */ \
86295367Sdes	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
87295367Sdes	    offsetof(struct seccomp_data, args[(_arg_nr)])), \
88295367Sdes	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_arg_val), 0, 1), \
89295367Sdes	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
90295367Sdes	/* reload syscall number; all rules expect it in accumulator */ \
91295367Sdes	BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
92295367Sdes		offsetof(struct seccomp_data, nr))
93239844Sdes
94239844Sdes/* Syscall filtering set for preauth. */
95239844Sdesstatic const struct sock_filter preauth_insns[] = {
96239844Sdes	/* Ensure the syscall arch convention is as expected. */
97239844Sdes	BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
98239844Sdes		offsetof(struct seccomp_data, arch)),
99239844Sdes	BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0),
100239844Sdes	BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
101239844Sdes	/* Load the syscall number for checking. */
102239844Sdes	BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
103239844Sdes		offsetof(struct seccomp_data, nr)),
104295367Sdes
105295367Sdes	/* Syscalls to non-fatally deny */
106295367Sdes#ifdef __NR_fstat
107295367Sdes	SC_DENY(fstat, EACCES),
108295367Sdes#endif
109295367Sdes#ifdef __NR_fstat64
110295367Sdes	SC_DENY(fstat64, EACCES),
111295367Sdes#endif
112295367Sdes#ifdef __NR_open
113239844Sdes	SC_DENY(open, EACCES),
114295367Sdes#endif
115295367Sdes#ifdef __NR_openat
116295367Sdes	SC_DENY(openat, EACCES),
117295367Sdes#endif
118295367Sdes#ifdef __NR_newfstatat
119295367Sdes	SC_DENY(newfstatat, EACCES),
120295367Sdes#endif
121295367Sdes#ifdef __NR_stat
122295367Sdes	SC_DENY(stat, EACCES),
123295367Sdes#endif
124295367Sdes#ifdef __NR_stat64
125295367Sdes	SC_DENY(stat64, EACCES),
126295367Sdes#endif
127295367Sdes
128295367Sdes	/* Syscalls to permit */
129295367Sdes#ifdef __NR_brk
130295367Sdes	SC_ALLOW(brk),
131295367Sdes#endif
132295367Sdes#ifdef __NR_clock_gettime
133255767Sdes	SC_ALLOW(clock_gettime),
134248613Sdes#endif
135295367Sdes#ifdef __NR_close
136239844Sdes	SC_ALLOW(close),
137264377Sdes#endif
138295367Sdes#ifdef __NR_exit
139295367Sdes	SC_ALLOW(exit),
140239844Sdes#endif
141295367Sdes#ifdef __NR_exit_group
142295367Sdes	SC_ALLOW(exit_group),
143295367Sdes#endif
144295367Sdes#ifdef __NR_getpgid
145295367Sdes	SC_ALLOW(getpgid),
146295367Sdes#endif
147295367Sdes#ifdef __NR_getpid
148295367Sdes	SC_ALLOW(getpid),
149295367Sdes#endif
150296853Sdes#ifdef __NR_getrandom
151296853Sdes	SC_ALLOW(getrandom),
152296853Sdes#endif
153295367Sdes#ifdef __NR_gettimeofday
154295367Sdes	SC_ALLOW(gettimeofday),
155295367Sdes#endif
156295367Sdes#ifdef __NR_madvise
157239844Sdes	SC_ALLOW(madvise),
158248613Sdes#endif
159248613Sdes#ifdef __NR_mmap
160239844Sdes	SC_ALLOW(mmap),
161248613Sdes#endif
162295367Sdes#ifdef __NR_mmap2
163295367Sdes	SC_ALLOW(mmap2),
164295367Sdes#endif
165295367Sdes#ifdef __NR_mremap
166295367Sdes	SC_ALLOW(mremap),
167295367Sdes#endif
168295367Sdes#ifdef __NR_munmap
169239844Sdes	SC_ALLOW(munmap),
170295367Sdes#endif
171295367Sdes#ifdef __NR__newselect
172295367Sdes	SC_ALLOW(_newselect),
173295367Sdes#endif
174295367Sdes#ifdef __NR_poll
175295367Sdes	SC_ALLOW(poll),
176295367Sdes#endif
177295367Sdes#ifdef __NR_pselect6
178295367Sdes	SC_ALLOW(pselect6),
179295367Sdes#endif
180295367Sdes#ifdef __NR_read
181295367Sdes	SC_ALLOW(read),
182295367Sdes#endif
183239844Sdes#ifdef __NR_rt_sigprocmask
184239844Sdes	SC_ALLOW(rt_sigprocmask),
185295367Sdes#endif
186295367Sdes#ifdef __NR_select
187295367Sdes	SC_ALLOW(select),
188295367Sdes#endif
189295367Sdes#ifdef __NR_shutdown
190295367Sdes	SC_ALLOW(shutdown),
191295367Sdes#endif
192295367Sdes#ifdef __NR_sigprocmask
193239844Sdes	SC_ALLOW(sigprocmask),
194239844Sdes#endif
195295367Sdes#ifdef __NR_time
196295367Sdes	SC_ALLOW(time),
197295367Sdes#endif
198295367Sdes#ifdef __NR_write
199295367Sdes	SC_ALLOW(write),
200295367Sdes#endif
201295367Sdes#ifdef __NR_socketcall
202295367Sdes	SC_ALLOW_ARG(socketcall, 0, SYS_SHUTDOWN),
203295367Sdes#endif
204295367Sdes
205295367Sdes	/* Default deny */
206239844Sdes	BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
207239844Sdes};
208239844Sdes
209239844Sdesstatic const struct sock_fprog preauth_program = {
210239844Sdes	.len = (unsigned short)(sizeof(preauth_insns)/sizeof(preauth_insns[0])),
211239844Sdes	.filter = (struct sock_filter *)preauth_insns,
212239844Sdes};
213239844Sdes
214239844Sdesstruct ssh_sandbox {
215239844Sdes	pid_t child_pid;
216239844Sdes};
217239844Sdes
218239844Sdesstruct ssh_sandbox *
219262566Sdesssh_sandbox_init(struct monitor *monitor)
220239844Sdes{
221239844Sdes	struct ssh_sandbox *box;
222239844Sdes
223239844Sdes	/*
224239844Sdes	 * Strictly, we don't need to maintain any state here but we need
225239844Sdes	 * to return non-NULL to satisfy the API.
226239844Sdes	 */
227239844Sdes	debug3("%s: preparing seccomp filter sandbox", __func__);
228239844Sdes	box = xcalloc(1, sizeof(*box));
229239844Sdes	box->child_pid = 0;
230239844Sdes
231239844Sdes	return box;
232239844Sdes}
233239844Sdes
234239844Sdes#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
235239844Sdesextern struct monitor *pmonitor;
236239844Sdesvoid mm_log_handler(LogLevel level, const char *msg, void *ctx);
237239844Sdes
238239844Sdesstatic void
239239844Sdesssh_sandbox_violation(int signum, siginfo_t *info, void *void_context)
240239844Sdes{
241239844Sdes	char msg[256];
242239844Sdes
243239844Sdes	snprintf(msg, sizeof(msg),
244239844Sdes	    "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)",
245239844Sdes	    __func__, info->si_arch, info->si_syscall, info->si_call_addr);
246239844Sdes	mm_log_handler(SYSLOG_LEVEL_FATAL, msg, pmonitor);
247239844Sdes	_exit(1);
248239844Sdes}
249239844Sdes
250239844Sdesstatic void
251239844Sdesssh_sandbox_child_debugging(void)
252239844Sdes{
253239844Sdes	struct sigaction act;
254239844Sdes	sigset_t mask;
255239844Sdes
256239844Sdes	debug3("%s: installing SIGSYS handler", __func__);
257239844Sdes	memset(&act, 0, sizeof(act));
258239844Sdes	sigemptyset(&mask);
259239844Sdes	sigaddset(&mask, SIGSYS);
260239844Sdes
261239844Sdes	act.sa_sigaction = &ssh_sandbox_violation;
262239844Sdes	act.sa_flags = SA_SIGINFO;
263239844Sdes	if (sigaction(SIGSYS, &act, NULL) == -1)
264239844Sdes		fatal("%s: sigaction(SIGSYS): %s", __func__, strerror(errno));
265239844Sdes	if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
266239844Sdes		fatal("%s: sigprocmask(SIGSYS): %s",
267239844Sdes		      __func__, strerror(errno));
268239844Sdes}
269239844Sdes#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
270239844Sdes
271239844Sdesvoid
272239844Sdesssh_sandbox_child(struct ssh_sandbox *box)
273239844Sdes{
274239844Sdes	struct rlimit rl_zero;
275239849Sdes	int nnp_failed = 0;
276239844Sdes
277239844Sdes	/* Set rlimits for completeness if possible. */
278239844Sdes	rl_zero.rlim_cur = rl_zero.rlim_max = 0;
279239844Sdes	if (setrlimit(RLIMIT_FSIZE, &rl_zero) == -1)
280239844Sdes		fatal("%s: setrlimit(RLIMIT_FSIZE, { 0, 0 }): %s",
281239844Sdes			__func__, strerror(errno));
282239844Sdes	if (setrlimit(RLIMIT_NOFILE, &rl_zero) == -1)
283239844Sdes		fatal("%s: setrlimit(RLIMIT_NOFILE, { 0, 0 }): %s",
284239844Sdes			__func__, strerror(errno));
285239844Sdes	if (setrlimit(RLIMIT_NPROC, &rl_zero) == -1)
286239844Sdes		fatal("%s: setrlimit(RLIMIT_NPROC, { 0, 0 }): %s",
287239844Sdes			__func__, strerror(errno));
288239844Sdes
289239844Sdes#ifdef SANDBOX_SECCOMP_FILTER_DEBUG
290239844Sdes	ssh_sandbox_child_debugging();
291239844Sdes#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
292239844Sdes
293239844Sdes	debug3("%s: setting PR_SET_NO_NEW_PRIVS", __func__);
294239849Sdes	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) {
295239849Sdes		debug("%s: prctl(PR_SET_NO_NEW_PRIVS): %s",
296239844Sdes		      __func__, strerror(errno));
297239849Sdes		nnp_failed = 1;
298239849Sdes	}
299239844Sdes	debug3("%s: attaching seccomp filter program", __func__);
300239844Sdes	if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &preauth_program) == -1)
301239849Sdes		debug("%s: prctl(PR_SET_SECCOMP): %s",
302239844Sdes		      __func__, strerror(errno));
303239849Sdes	else if (nnp_failed)
304239849Sdes		fatal("%s: SECCOMP_MODE_FILTER activated but "
305239849Sdes		    "PR_SET_NO_NEW_PRIVS failed", __func__);
306239844Sdes}
307239844Sdes
308239844Sdesvoid
309239844Sdesssh_sandbox_parent_finish(struct ssh_sandbox *box)
310239844Sdes{
311239844Sdes	free(box);
312239844Sdes	debug3("%s: finished", __func__);
313239844Sdes}
314239844Sdes
315239844Sdesvoid
316239844Sdesssh_sandbox_parent_preauth(struct ssh_sandbox *box, pid_t child_pid)
317239844Sdes{
318239844Sdes	box->child_pid = child_pid;
319239844Sdes}
320239844Sdes
321239844Sdes#endif /* SANDBOX_SECCOMP_FILTER */
322