1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 1999 Marcel Moolenaar
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/param.h>
30#include <sys/lock.h>
31#include <sys/malloc.h>
32#include <sys/mount.h>
33#include <sys/jail.h>
34#include <sys/proc.h>
35#include <sys/sx.h>
36
37#include <compat/linux/linux_mib.h>
38#include <compat/linux/linux_misc.h>
39
40struct linux_prison {
41	char	pr_osname[LINUX_MAX_UTSNAME];
42	char	pr_osrelease[LINUX_MAX_UTSNAME];
43	int	pr_oss_version;
44	int	pr_osrel;
45};
46
47static struct linux_prison lprison0 = {
48	.pr_osname =		"Linux",
49	.pr_osrelease =		LINUX_VERSION_STR,
50	.pr_oss_version =	0x030600,
51	.pr_osrel =		LINUX_VERSION_CODE
52};
53
54static unsigned linux_osd_jail_slot;
55
56SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
57    "Linux mode");
58
59int linux_debug = 3;
60SYSCTL_INT(_compat_linux, OID_AUTO, debug, CTLFLAG_RWTUN,
61    &linux_debug, 0, "Log warnings from linux(4); or 0 to disable");
62
63int linux_default_openfiles = 1024;
64SYSCTL_INT(_compat_linux, OID_AUTO, default_openfiles, CTLFLAG_RWTUN,
65    &linux_default_openfiles, 0,
66    "Default soft openfiles resource limit, or -1 for unlimited");
67
68int linux_default_stacksize = 8 * 1024 * 1024;
69SYSCTL_INT(_compat_linux, OID_AUTO, default_stacksize, CTLFLAG_RWTUN,
70    &linux_default_stacksize, 0,
71    "Default soft stack size resource limit, or -1 for unlimited");
72
73int linux_dummy_rlimits = 0;
74SYSCTL_INT(_compat_linux, OID_AUTO, dummy_rlimits, CTLFLAG_RWTUN,
75    &linux_dummy_rlimits, 0,
76    "Return dummy values for unsupported Linux-specific rlimits");
77
78int linux_ignore_ip_recverr = 1;
79SYSCTL_INT(_compat_linux, OID_AUTO, ignore_ip_recverr, CTLFLAG_RWTUN,
80    &linux_ignore_ip_recverr, 0, "Ignore enabling IP_RECVERR");
81
82int linux_preserve_vstatus = 1;
83SYSCTL_INT(_compat_linux, OID_AUTO, preserve_vstatus, CTLFLAG_RWTUN,
84    &linux_preserve_vstatus, 0, "Preserve VSTATUS termios(4) flag");
85
86bool linux_map_sched_prio = true;
87SYSCTL_BOOL(_compat_linux, OID_AUTO, map_sched_prio, CTLFLAG_RDTUN,
88    &linux_map_sched_prio, 0, "Map scheduler priorities to Linux priorities "
89    "(not POSIX compliant)");
90
91static bool linux_setid_allowed = true;
92SYSCTL_BOOL(_compat_linux, OID_AUTO, setid_allowed, CTLFLAG_RWTUN,
93    &linux_setid_allowed, 0,
94    "Allow setuid/setgid on execve of Linux binary");
95
96int
97linux_setid_allowed_query(struct thread *td __unused,
98    struct image_params *imgp __unused)
99{
100	return (linux_setid_allowed);
101}
102
103static int	linux_set_osname(struct thread *td, char *osname);
104static int	linux_set_osrelease(struct thread *td, char *osrelease);
105static int	linux_set_oss_version(struct thread *td, int oss_version);
106
107static int
108linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
109{
110	char osname[LINUX_MAX_UTSNAME];
111	int error;
112
113	linux_get_osname(req->td, osname);
114	error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
115	if (error != 0 || req->newptr == NULL)
116		return (error);
117	error = linux_set_osname(req->td, osname);
118
119	return (error);
120}
121
122SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
123	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
124	    0, 0, linux_sysctl_osname, "A",
125	    "Linux kernel OS name");
126
127static int
128linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
129{
130	char osrelease[LINUX_MAX_UTSNAME];
131	int error;
132
133	linux_get_osrelease(req->td, osrelease);
134	error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
135	if (error != 0 || req->newptr == NULL)
136		return (error);
137	error = linux_set_osrelease(req->td, osrelease);
138
139	return (error);
140}
141
142SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
143	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
144	    0, 0, linux_sysctl_osrelease, "A",
145	    "Linux kernel OS release");
146
147static int
148linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
149{
150	int oss_version;
151	int error;
152
153	oss_version = linux_get_oss_version(req->td);
154	error = sysctl_handle_int(oidp, &oss_version, 0, req);
155	if (error != 0 || req->newptr == NULL)
156		return (error);
157	error = linux_set_oss_version(req->td, oss_version);
158
159	return (error);
160}
161
162SYSCTL_PROC(_compat_linux, OID_AUTO, oss_version,
163	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
164	    0, 0, linux_sysctl_oss_version, "I",
165	    "Linux OSS version");
166
167/*
168 * Map the osrelease into integer
169 */
170static int
171linux_map_osrel(char *osrelease, int *osrel)
172{
173	char *sep, *eosrelease;
174	int len, v0, v1, v2, v;
175
176	len = strlen(osrelease);
177	eosrelease = osrelease + len;
178	v0 = strtol(osrelease, &sep, 10);
179	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
180		return (EINVAL);
181	osrelease = sep + 1;
182	v1 = strtol(osrelease, &sep, 10);
183	if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
184		return (EINVAL);
185	osrelease = sep + 1;
186	v2 = strtol(osrelease, &sep, 10);
187	if (osrelease == sep ||
188	    (sep != eosrelease && (sep + 1 >= eosrelease || *sep != '-')))
189		return (EINVAL);
190
191	v = LINUX_KERNVER(v0, v1, v2);
192	if (v < LINUX_KERNVER(1, 0, 0))
193		return (EINVAL);
194
195	if (osrel != NULL)
196		*osrel = v;
197
198	return (0);
199}
200
201/*
202 * Find a prison with Linux info.
203 * Return the Linux info and the (locked) prison.
204 */
205static struct linux_prison *
206linux_find_prison(struct prison *spr, struct prison **prp)
207{
208	struct prison *pr;
209	struct linux_prison *lpr;
210
211	for (pr = spr;; pr = pr->pr_parent) {
212		mtx_lock(&pr->pr_mtx);
213		lpr = (pr == &prison0)
214		    ? &lprison0
215		    : osd_jail_get(pr, linux_osd_jail_slot);
216		if (lpr != NULL)
217			break;
218		mtx_unlock(&pr->pr_mtx);
219	}
220	*prp = pr;
221
222	return (lpr);
223}
224
225/*
226 * Ensure a prison has its own Linux info.  If lprp is non-null, point it to
227 * the Linux info and lock the prison.
228 */
229static void
230linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
231{
232	struct prison *ppr;
233	struct linux_prison *lpr, *nlpr;
234	void **rsv;
235
236	/* If this prison already has Linux info, return that. */
237	lpr = linux_find_prison(pr, &ppr);
238	if (ppr == pr)
239		goto done;
240	/*
241	 * Allocate a new info record.  Then check again, in case something
242	 * changed during the allocation.
243	 */
244	mtx_unlock(&ppr->pr_mtx);
245	nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
246	rsv = osd_reserve(linux_osd_jail_slot);
247	lpr = linux_find_prison(pr, &ppr);
248	if (ppr == pr) {
249		free(nlpr, M_PRISON);
250		osd_free_reserved(rsv);
251		goto done;
252	}
253	/* Inherit the initial values from the ancestor. */
254	mtx_lock(&pr->pr_mtx);
255	(void)osd_jail_set_reserved(pr, linux_osd_jail_slot, rsv, nlpr);
256	bcopy(lpr, nlpr, sizeof(*lpr));
257	lpr = nlpr;
258	mtx_unlock(&ppr->pr_mtx);
259 done:
260	if (lprp != NULL)
261		*lprp = lpr;
262	else
263		mtx_unlock(&pr->pr_mtx);
264}
265
266/*
267 * Jail OSD methods for Linux prison data.
268 */
269static int
270linux_prison_create(void *obj, void *data)
271{
272	struct prison *pr = obj;
273	struct vfsoptlist *opts = data;
274	int jsys;
275
276	if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
277	    jsys == JAIL_SYS_INHERIT)
278		return (0);
279	/*
280	 * Inherit a prison's initial values from its parent
281	 * (different from JAIL_SYS_INHERIT which also inherits changes).
282	 */
283	linux_alloc_prison(pr, NULL);
284	return (0);
285}
286
287static int
288linux_prison_check(void *obj __unused, void *data)
289{
290	struct vfsoptlist *opts = data;
291	char *osname, *osrelease;
292	int error, jsys, len, oss_version;
293
294	/* Check that the parameters are correct. */
295	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
296	if (error != ENOENT) {
297		if (error != 0)
298			return (error);
299		if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
300			return (EINVAL);
301	}
302	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
303	if (error != ENOENT) {
304		if (error != 0)
305			return (error);
306		if (len == 0 || osname[len - 1] != '\0')
307			return (EINVAL);
308		if (len > LINUX_MAX_UTSNAME) {
309			vfs_opterror(opts, "linux.osname too long");
310			return (ENAMETOOLONG);
311		}
312	}
313	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
314	if (error != ENOENT) {
315		if (error != 0)
316			return (error);
317		if (len == 0 || osrelease[len - 1] != '\0')
318			return (EINVAL);
319		if (len > LINUX_MAX_UTSNAME) {
320			vfs_opterror(opts, "linux.osrelease too long");
321			return (ENAMETOOLONG);
322		}
323		error = linux_map_osrel(osrelease, NULL);
324		if (error != 0) {
325			vfs_opterror(opts, "linux.osrelease format error");
326			return (error);
327		}
328	}
329	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
330	    sizeof(oss_version));
331
332	if (error == ENOENT)
333		error = 0;
334	return (error);
335}
336
337static int
338linux_prison_set(void *obj, void *data)
339{
340	struct linux_prison *lpr;
341	struct prison *pr = obj;
342	struct vfsoptlist *opts = data;
343	char *osname, *osrelease;
344	int error, gotversion, jsys, len, oss_version;
345
346	/* Set the parameters, which should be correct. */
347	error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
348	if (error == ENOENT)
349		jsys = -1;
350	error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
351	if (error == ENOENT)
352		osname = NULL;
353	else
354		jsys = JAIL_SYS_NEW;
355	error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
356	if (error == ENOENT)
357		osrelease = NULL;
358	else
359		jsys = JAIL_SYS_NEW;
360	error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
361	    sizeof(oss_version));
362	if (error == ENOENT)
363		gotversion = 0;
364	else {
365		gotversion = 1;
366		jsys = JAIL_SYS_NEW;
367	}
368	switch (jsys) {
369	case JAIL_SYS_INHERIT:
370		/* "linux=inherit": inherit the parent's Linux info. */
371		mtx_lock(&pr->pr_mtx);
372		osd_jail_del(pr, linux_osd_jail_slot);
373		mtx_unlock(&pr->pr_mtx);
374		break;
375	case JAIL_SYS_NEW:
376		/*
377		 * "linux=new" or "linux.*":
378		 * the prison gets its own Linux info.
379		 */
380		linux_alloc_prison(pr, &lpr);
381		if (osrelease) {
382			(void)linux_map_osrel(osrelease, &lpr->pr_osrel);
383			strlcpy(lpr->pr_osrelease, osrelease,
384			    LINUX_MAX_UTSNAME);
385		}
386		if (osname)
387			strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
388		if (gotversion)
389			lpr->pr_oss_version = oss_version;
390		mtx_unlock(&pr->pr_mtx);
391	}
392
393	return (0);
394}
395
396SYSCTL_JAIL_PARAM_SYS_NODE(linux, CTLFLAG_RW, "Jail Linux parameters");
397SYSCTL_JAIL_PARAM_STRING(_linux, osname, CTLFLAG_RW, LINUX_MAX_UTSNAME,
398    "Jail Linux kernel OS name");
399SYSCTL_JAIL_PARAM_STRING(_linux, osrelease, CTLFLAG_RW, LINUX_MAX_UTSNAME,
400    "Jail Linux kernel OS release");
401SYSCTL_JAIL_PARAM(_linux, oss_version, CTLTYPE_INT | CTLFLAG_RW,
402    "I", "Jail Linux OSS version");
403
404static int
405linux_prison_get(void *obj, void *data)
406{
407	struct linux_prison *lpr;
408	struct prison *ppr;
409	struct prison *pr = obj;
410	struct vfsoptlist *opts = data;
411	int error, i;
412
413	static int version0;
414
415	/* See if this prison is the one with the Linux info. */
416	lpr = linux_find_prison(pr, &ppr);
417	i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
418	error = vfs_setopt(opts, "linux", &i, sizeof(i));
419	if (error != 0 && error != ENOENT)
420		goto done;
421	if (i) {
422		error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
423		if (error != 0 && error != ENOENT)
424			goto done;
425		error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
426		if (error != 0 && error != ENOENT)
427			goto done;
428		error = vfs_setopt(opts, "linux.oss_version",
429		    &lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
430		if (error != 0 && error != ENOENT)
431			goto done;
432	} else {
433		/*
434		 * If this prison is inheriting its Linux info, report
435		 * empty/zero parameters.
436		 */
437		error = vfs_setopts(opts, "linux.osname", "");
438		if (error != 0 && error != ENOENT)
439			goto done;
440		error = vfs_setopts(opts, "linux.osrelease", "");
441		if (error != 0 && error != ENOENT)
442			goto done;
443		error = vfs_setopt(opts, "linux.oss_version", &version0,
444		    sizeof(lpr->pr_oss_version));
445		if (error != 0 && error != ENOENT)
446			goto done;
447	}
448	error = 0;
449
450 done:
451	mtx_unlock(&ppr->pr_mtx);
452
453	return (error);
454}
455
456static void
457linux_prison_destructor(void *data)
458{
459
460	free(data, M_PRISON);
461}
462
463void
464linux_osd_jail_register(void)
465{
466	struct prison *pr;
467	osd_method_t methods[PR_MAXMETHOD] = {
468	    [PR_METHOD_CREATE] =	linux_prison_create,
469	    [PR_METHOD_GET] =		linux_prison_get,
470	    [PR_METHOD_SET] =		linux_prison_set,
471	    [PR_METHOD_CHECK] =		linux_prison_check
472	};
473
474	linux_osd_jail_slot =
475	    osd_jail_register(linux_prison_destructor, methods);
476	/* Copy the system Linux info to any current prisons. */
477	sx_slock(&allprison_lock);
478	TAILQ_FOREACH(pr, &allprison, pr_list)
479		linux_alloc_prison(pr, NULL);
480	sx_sunlock(&allprison_lock);
481}
482
483void
484linux_osd_jail_deregister(void)
485{
486
487	osd_jail_deregister(linux_osd_jail_slot);
488}
489
490void
491linux_get_osname(struct thread *td, char *dst)
492{
493	struct prison *pr;
494	struct linux_prison *lpr;
495
496	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
497	bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
498	mtx_unlock(&pr->pr_mtx);
499}
500
501static int
502linux_set_osname(struct thread *td, char *osname)
503{
504	struct prison *pr;
505	struct linux_prison *lpr;
506
507	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
508	strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
509	mtx_unlock(&pr->pr_mtx);
510
511	return (0);
512}
513
514void
515linux_get_osrelease(struct thread *td, char *dst)
516{
517	struct prison *pr;
518	struct linux_prison *lpr;
519
520	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
521	bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
522	mtx_unlock(&pr->pr_mtx);
523}
524
525int
526linux_kernver(struct thread *td)
527{
528	struct prison *pr;
529	struct linux_prison *lpr;
530	int osrel;
531
532	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
533	osrel = lpr->pr_osrel;
534	mtx_unlock(&pr->pr_mtx);
535
536	return (osrel);
537}
538
539static int
540linux_set_osrelease(struct thread *td, char *osrelease)
541{
542	struct prison *pr;
543	struct linux_prison *lpr;
544	int error;
545
546	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
547	error = linux_map_osrel(osrelease, &lpr->pr_osrel);
548	if (error == 0)
549		strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
550	mtx_unlock(&pr->pr_mtx);
551
552	return (error);
553}
554
555int
556linux_get_oss_version(struct thread *td)
557{
558	struct prison *pr;
559	struct linux_prison *lpr;
560	int version;
561
562	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
563	version = lpr->pr_oss_version;
564	mtx_unlock(&pr->pr_mtx);
565
566	return (version);
567}
568
569static int
570linux_set_oss_version(struct thread *td, int oss_version)
571{
572	struct prison *pr;
573	struct linux_prison *lpr;
574
575	lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
576	lpr->pr_oss_version = oss_version;
577	mtx_unlock(&pr->pr_mtx);
578
579	return (0);
580}
581