sandbox-seccomp-filter.c revision 323136
1/* 2 * Copyright (c) 2012 Will Drewry <wad@dataspill.org> 3 * 4 * Permission to use, copy, modify, and distribute this software for any 5 * purpose with or without fee is hereby granted, provided that the above 6 * copyright notice and this permission notice appear in all copies. 7 * 8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 15 */ 16 17/* 18 * Uncomment the SANDBOX_SECCOMP_FILTER_DEBUG macro below to help diagnose 19 * filter breakage during development. *Do not* use this in production, 20 * as it relies on making library calls that are unsafe in signal context. 21 * 22 * Instead, live systems the auditctl(8) may be used to monitor failures. 23 * E.g. 24 * auditctl -a task,always -F uid=<privsep uid> 25 */ 26/* #define SANDBOX_SECCOMP_FILTER_DEBUG 1 */ 27 28/* XXX it should be possible to do logging via the log socket safely */ 29 30#ifdef SANDBOX_SECCOMP_FILTER_DEBUG 31/* Use the kernel headers in case of an older toolchain. */ 32# include <asm/siginfo.h> 33# define __have_siginfo_t 1 34# define __have_sigval_t 1 35# define __have_sigevent_t 1 36#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 37 38#include "includes.h" 39 40#ifdef SANDBOX_SECCOMP_FILTER 41 42#include <sys/types.h> 43#include <sys/resource.h> 44#include <sys/prctl.h> 45 46#include <linux/net.h> 47#include <linux/audit.h> 48#include <linux/filter.h> 49#include <linux/seccomp.h> 50#include <elf.h> 51 52#include <asm/unistd.h> 53 54#include <errno.h> 55#include <signal.h> 56#include <stdarg.h> 57#include <stddef.h> /* for offsetof */ 58#include <stdio.h> 59#include <stdlib.h> 60#include <string.h> 61#include <unistd.h> 62 63#include "log.h" 64#include "ssh-sandbox.h" 65#include "xmalloc.h" 66 67/* Linux seccomp_filter sandbox */ 68#define SECCOMP_FILTER_FAIL SECCOMP_RET_KILL 69 70/* Use a signal handler to emit violations when debugging */ 71#ifdef SANDBOX_SECCOMP_FILTER_DEBUG 72# undef SECCOMP_FILTER_FAIL 73# define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP 74#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 75 76#if __BYTE_ORDER == __LITTLE_ENDIAN 77# define ARG_LO_OFFSET 0 78# define ARG_HI_OFFSET sizeof(uint32_t) 79#elif __BYTE_ORDER == __BIG_ENDIAN 80# define ARG_LO_OFFSET sizeof(uint32_t) 81# define ARG_HI_OFFSET 0 82#else 83#error "Unknown endianness" 84#endif 85 86/* Simple helpers to avoid manual errors (but larger BPF programs). */ 87#define SC_DENY(_nr, _errno) \ 88 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \ 89 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno)) 90#define SC_ALLOW(_nr) \ 91 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \ 92 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW) 93#define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \ 94 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 6), \ 95 /* load and test first syscall argument, low word */ \ 96 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 97 offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \ 98 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \ 99 ((_arg_val) & 0xFFFFFFFF), 0, 3), \ 100 /* load and test first syscall argument, high word */ \ 101 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 102 offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \ 103 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \ 104 (((uint32_t)((uint64_t)(_arg_val) >> 32)) & 0xFFFFFFFF), 0, 1), \ 105 BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \ 106 /* reload syscall number; all rules expect it in accumulator */ \ 107 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \ 108 offsetof(struct seccomp_data, nr)) 109 110/* Syscall filtering set for preauth. */ 111static const struct sock_filter preauth_insns[] = { 112 /* Ensure the syscall arch convention is as expected. */ 113 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 114 offsetof(struct seccomp_data, arch)), 115 BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0), 116 BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL), 117 /* Load the syscall number for checking. */ 118 BPF_STMT(BPF_LD+BPF_W+BPF_ABS, 119 offsetof(struct seccomp_data, nr)), 120 121 /* Syscalls to non-fatally deny */ 122#ifdef __NR_lstat 123 SC_DENY(__NR_lstat, EACCES), 124#endif 125#ifdef __NR_lstat64 126 SC_DENY(__NR_lstat64, EACCES), 127#endif 128#ifdef __NR_fstat 129 SC_DENY(__NR_fstat, EACCES), 130#endif 131#ifdef __NR_fstat64 132 SC_DENY(__NR_fstat64, EACCES), 133#endif 134#ifdef __NR_open 135 SC_DENY(__NR_open, EACCES), 136#endif 137#ifdef __NR_openat 138 SC_DENY(__NR_openat, EACCES), 139#endif 140#ifdef __NR_newfstatat 141 SC_DENY(__NR_newfstatat, EACCES), 142#endif 143#ifdef __NR_stat 144 SC_DENY(__NR_stat, EACCES), 145#endif 146#ifdef __NR_stat64 147 SC_DENY(__NR_stat64, EACCES), 148#endif 149 150 /* Syscalls to permit */ 151#ifdef __NR_brk 152 SC_ALLOW(__NR_brk), 153#endif 154#ifdef __NR_clock_gettime 155 SC_ALLOW(__NR_clock_gettime), 156#endif 157#ifdef __NR_close 158 SC_ALLOW(__NR_close), 159#endif 160#ifdef __NR_exit 161 SC_ALLOW(__NR_exit), 162#endif 163#ifdef __NR_exit_group 164 SC_ALLOW(__NR_exit_group), 165#endif 166#ifdef __NR_getpgid 167 SC_ALLOW(__NR_getpgid), 168#endif 169#ifdef __NR_getpid 170 SC_ALLOW(__NR_getpid), 171#endif 172#ifdef __NR_getrandom 173 SC_ALLOW(__NR_getrandom), 174#endif 175#ifdef __NR_gettimeofday 176 SC_ALLOW(__NR_gettimeofday), 177#endif 178#ifdef __NR_madvise 179 SC_ALLOW(__NR_madvise), 180#endif 181#ifdef __NR_mmap 182 SC_ALLOW(__NR_mmap), 183#endif 184#ifdef __NR_mmap2 185 SC_ALLOW(__NR_mmap2), 186#endif 187#ifdef __NR_mremap 188 SC_ALLOW(__NR_mremap), 189#endif 190#ifdef __NR_munmap 191 SC_ALLOW(__NR_munmap), 192#endif 193#ifdef __NR__newselect 194 SC_ALLOW(__NR__newselect), 195#endif 196#ifdef __NR_poll 197 SC_ALLOW(__NR_poll), 198#endif 199#ifdef __NR_pselect6 200 SC_ALLOW(__NR_pselect6), 201#endif 202#ifdef __NR_read 203 SC_ALLOW(__NR_read), 204#endif 205#ifdef __NR_rt_sigprocmask 206 SC_ALLOW(__NR_rt_sigprocmask), 207#endif 208#ifdef __NR_select 209 SC_ALLOW(__NR_select), 210#endif 211#ifdef __NR_shutdown 212 SC_ALLOW(__NR_shutdown), 213#endif 214#ifdef __NR_sigprocmask 215 SC_ALLOW(__NR_sigprocmask), 216#endif 217#ifdef __NR_time 218 SC_ALLOW(__NR_time), 219#endif 220#ifdef __NR_write 221 SC_ALLOW(__NR_write), 222#endif 223#ifdef __NR_socketcall 224 SC_ALLOW_ARG(__NR_socketcall, 0, SYS_SHUTDOWN), 225#endif 226#if defined(__NR_ioctl) && defined(__s390__) 227 /* Allow ioctls for ICA crypto card on s390 */ 228 SC_ALLOW_ARG(__NR_ioctl, 1, Z90STAT_STATUS_MASK), 229 SC_ALLOW_ARG(__NR_ioctl, 1, ICARSAMODEXPO), 230 SC_ALLOW_ARG(__NR_ioctl, 1, ICARSACRT), 231#endif 232#if defined(__x86_64__) && defined(__ILP32__) && defined(__X32_SYSCALL_BIT) 233 /* 234 * On Linux x32, the clock_gettime VDSO falls back to the 235 * x86-64 syscall under some circumstances, e.g. 236 * https://bugs.debian.org/849923 237 */ 238 SC_ALLOW(__NR_clock_gettime & ~__X32_SYSCALL_BIT); 239#endif 240 241 /* Default deny */ 242 BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL), 243}; 244 245static const struct sock_fprog preauth_program = { 246 .len = (unsigned short)(sizeof(preauth_insns)/sizeof(preauth_insns[0])), 247 .filter = (struct sock_filter *)preauth_insns, 248}; 249 250struct ssh_sandbox { 251 pid_t child_pid; 252}; 253 254struct ssh_sandbox * 255ssh_sandbox_init(struct monitor *monitor) 256{ 257 struct ssh_sandbox *box; 258 259 /* 260 * Strictly, we don't need to maintain any state here but we need 261 * to return non-NULL to satisfy the API. 262 */ 263 debug3("%s: preparing seccomp filter sandbox", __func__); 264 box = xcalloc(1, sizeof(*box)); 265 box->child_pid = 0; 266 267 return box; 268} 269 270#ifdef SANDBOX_SECCOMP_FILTER_DEBUG 271extern struct monitor *pmonitor; 272void mm_log_handler(LogLevel level, const char *msg, void *ctx); 273 274static void 275ssh_sandbox_violation(int signum, siginfo_t *info, void *void_context) 276{ 277 char msg[256]; 278 279 snprintf(msg, sizeof(msg), 280 "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)", 281 __func__, info->si_arch, info->si_syscall, info->si_call_addr); 282 mm_log_handler(SYSLOG_LEVEL_FATAL, msg, pmonitor); 283 _exit(1); 284} 285 286static void 287ssh_sandbox_child_debugging(void) 288{ 289 struct sigaction act; 290 sigset_t mask; 291 292 debug3("%s: installing SIGSYS handler", __func__); 293 memset(&act, 0, sizeof(act)); 294 sigemptyset(&mask); 295 sigaddset(&mask, SIGSYS); 296 297 act.sa_sigaction = &ssh_sandbox_violation; 298 act.sa_flags = SA_SIGINFO; 299 if (sigaction(SIGSYS, &act, NULL) == -1) 300 fatal("%s: sigaction(SIGSYS): %s", __func__, strerror(errno)); 301 if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1) 302 fatal("%s: sigprocmask(SIGSYS): %s", 303 __func__, strerror(errno)); 304} 305#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 306 307void 308ssh_sandbox_child(struct ssh_sandbox *box) 309{ 310 struct rlimit rl_zero; 311 int nnp_failed = 0; 312 313 /* Set rlimits for completeness if possible. */ 314 rl_zero.rlim_cur = rl_zero.rlim_max = 0; 315 if (setrlimit(RLIMIT_FSIZE, &rl_zero) == -1) 316 fatal("%s: setrlimit(RLIMIT_FSIZE, { 0, 0 }): %s", 317 __func__, strerror(errno)); 318 if (setrlimit(RLIMIT_NOFILE, &rl_zero) == -1) 319 fatal("%s: setrlimit(RLIMIT_NOFILE, { 0, 0 }): %s", 320 __func__, strerror(errno)); 321 if (setrlimit(RLIMIT_NPROC, &rl_zero) == -1) 322 fatal("%s: setrlimit(RLIMIT_NPROC, { 0, 0 }): %s", 323 __func__, strerror(errno)); 324 325#ifdef SANDBOX_SECCOMP_FILTER_DEBUG 326 ssh_sandbox_child_debugging(); 327#endif /* SANDBOX_SECCOMP_FILTER_DEBUG */ 328 329 debug3("%s: setting PR_SET_NO_NEW_PRIVS", __func__); 330 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) { 331 debug("%s: prctl(PR_SET_NO_NEW_PRIVS): %s", 332 __func__, strerror(errno)); 333 nnp_failed = 1; 334 } 335 debug3("%s: attaching seccomp filter program", __func__); 336 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &preauth_program) == -1) 337 debug("%s: prctl(PR_SET_SECCOMP): %s", 338 __func__, strerror(errno)); 339 else if (nnp_failed) 340 fatal("%s: SECCOMP_MODE_FILTER activated but " 341 "PR_SET_NO_NEW_PRIVS failed", __func__); 342} 343 344void 345ssh_sandbox_parent_finish(struct ssh_sandbox *box) 346{ 347 free(box); 348 debug3("%s: finished", __func__); 349} 350 351void 352ssh_sandbox_parent_preauth(struct ssh_sandbox *box, pid_t child_pid) 353{ 354 box->child_pid = child_pid; 355} 356 357#endif /* SANDBOX_SECCOMP_FILTER */ 358