1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * sysctl.c: General linux system control interface
4 *
5 * Begun 24 March 1995, Stephen Tweedie
6 * Added /proc support, Dec 1995
7 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
8 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
9 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
10 * Dynamic registration fixes, Stephen Tweedie.
11 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
12 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
13 *  Horn.
14 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
15 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
16 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
17 *  Wendling.
18 * The list_for_each() macro wasn't appropriate for the sysctl loop.
19 *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
20 */
21
22#include <linux/module.h>
23#include <linux/mm.h>
24#include <linux/swap.h>
25#include <linux/slab.h>
26#include <linux/sysctl.h>
27#include <linux/bitmap.h>
28#include <linux/signal.h>
29#include <linux/panic.h>
30#include <linux/printk.h>
31#include <linux/proc_fs.h>
32#include <linux/security.h>
33#include <linux/ctype.h>
34#include <linux/kmemleak.h>
35#include <linux/filter.h>
36#include <linux/fs.h>
37#include <linux/init.h>
38#include <linux/kernel.h>
39#include <linux/kobject.h>
40#include <linux/net.h>
41#include <linux/sysrq.h>
42#include <linux/highuid.h>
43#include <linux/writeback.h>
44#include <linux/ratelimit.h>
45#include <linux/compaction.h>
46#include <linux/hugetlb.h>
47#include <linux/initrd.h>
48#include <linux/key.h>
49#include <linux/times.h>
50#include <linux/limits.h>
51#include <linux/dcache.h>
52#include <linux/syscalls.h>
53#include <linux/vmstat.h>
54#include <linux/nfs_fs.h>
55#include <linux/acpi.h>
56#include <linux/reboot.h>
57#include <linux/ftrace.h>
58#include <linux/perf_event.h>
59#include <linux/oom.h>
60#include <linux/kmod.h>
61#include <linux/capability.h>
62#include <linux/binfmts.h>
63#include <linux/sched/sysctl.h>
64#include <linux/mount.h>
65#include <linux/userfaultfd_k.h>
66#include <linux/pid.h>
67
68#include "../lib/kstrtox.h"
69
70#include <linux/uaccess.h>
71#include <asm/processor.h>
72
73#ifdef CONFIG_X86
74#include <asm/nmi.h>
75#include <asm/stacktrace.h>
76#include <asm/io.h>
77#endif
78#ifdef CONFIG_SPARC
79#include <asm/setup.h>
80#endif
81#ifdef CONFIG_RT_MUTEXES
82#include <linux/rtmutex.h>
83#endif
84
85#if defined(CONFIG_SYSCTL)
86
87/* Constants used for minimum and  maximum */
88
89#ifdef CONFIG_PERF_EVENTS
90static const int six_hundred_forty_kb = 640 * 1024;
91#endif
92
93
94static const int ngroups_max = NGROUPS_MAX;
95static const int cap_last_cap = CAP_LAST_CAP;
96
97#ifdef CONFIG_PROC_SYSCTL
98
99/**
100 * enum sysctl_writes_mode - supported sysctl write modes
101 *
102 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
103 *	to be written, and multiple writes on the same sysctl file descriptor
104 *	will rewrite the sysctl value, regardless of file position. No warning
105 *	is issued when the initial position is not 0.
106 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
107 *	not 0.
108 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
109 *	file position 0 and the value must be fully contained in the buffer
110 *	sent to the write syscall. If dealing with strings respect the file
111 *	position, but restrict this to the max length of the buffer, anything
112 *	passed the max length will be ignored. Multiple writes will append
113 *	to the buffer.
114 *
115 * These write modes control how current file position affects the behavior of
116 * updating sysctl values through the proc interface on each write.
117 */
118enum sysctl_writes_mode {
119	SYSCTL_WRITES_LEGACY		= -1,
120	SYSCTL_WRITES_WARN		= 0,
121	SYSCTL_WRITES_STRICT		= 1,
122};
123
124static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
125#endif /* CONFIG_PROC_SYSCTL */
126
127#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
128    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
129int sysctl_legacy_va_layout;
130#endif
131
132#ifdef CONFIG_COMPACTION
133/* min_extfrag_threshold is SYSCTL_ZERO */;
134static const int max_extfrag_threshold = 1000;
135#endif
136
137#endif /* CONFIG_SYSCTL */
138
139/*
140 * /proc/sys support
141 */
142
143#ifdef CONFIG_PROC_SYSCTL
144
145static int _proc_do_string(char *data, int maxlen, int write,
146		char *buffer, size_t *lenp, loff_t *ppos)
147{
148	size_t len;
149	char c, *p;
150
151	if (!data || !maxlen || !*lenp) {
152		*lenp = 0;
153		return 0;
154	}
155
156	if (write) {
157		if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
158			/* Only continue writes not past the end of buffer. */
159			len = strlen(data);
160			if (len > maxlen - 1)
161				len = maxlen - 1;
162
163			if (*ppos > len)
164				return 0;
165			len = *ppos;
166		} else {
167			/* Start writing from beginning of buffer. */
168			len = 0;
169		}
170
171		*ppos += *lenp;
172		p = buffer;
173		while ((p - buffer) < *lenp && len < maxlen - 1) {
174			c = *(p++);
175			if (c == 0 || c == '\n')
176				break;
177			data[len++] = c;
178		}
179		data[len] = 0;
180	} else {
181		len = strlen(data);
182		if (len > maxlen)
183			len = maxlen;
184
185		if (*ppos > len) {
186			*lenp = 0;
187			return 0;
188		}
189
190		data += *ppos;
191		len  -= *ppos;
192
193		if (len > *lenp)
194			len = *lenp;
195		if (len)
196			memcpy(buffer, data, len);
197		if (len < *lenp) {
198			buffer[len] = '\n';
199			len++;
200		}
201		*lenp = len;
202		*ppos += len;
203	}
204	return 0;
205}
206
207static void warn_sysctl_write(struct ctl_table *table)
208{
209	pr_warn_once("%s wrote to %s when file position was not 0!\n"
210		"This will not be supported in the future. To silence this\n"
211		"warning, set kernel.sysctl_writes_strict = -1\n",
212		current->comm, table->procname);
213}
214
215/**
216 * proc_first_pos_non_zero_ignore - check if first position is allowed
217 * @ppos: file position
218 * @table: the sysctl table
219 *
220 * Returns true if the first position is non-zero and the sysctl_writes_strict
221 * mode indicates this is not allowed for numeric input types. String proc
222 * handlers can ignore the return value.
223 */
224static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
225					   struct ctl_table *table)
226{
227	if (!*ppos)
228		return false;
229
230	switch (sysctl_writes_strict) {
231	case SYSCTL_WRITES_STRICT:
232		return true;
233	case SYSCTL_WRITES_WARN:
234		warn_sysctl_write(table);
235		return false;
236	default:
237		return false;
238	}
239}
240
241/**
242 * proc_dostring - read a string sysctl
243 * @table: the sysctl table
244 * @write: %TRUE if this is a write to the sysctl file
245 * @buffer: the user buffer
246 * @lenp: the size of the user buffer
247 * @ppos: file position
248 *
249 * Reads/writes a string from/to the user buffer. If the kernel
250 * buffer provided is not large enough to hold the string, the
251 * string is truncated. The copied string is %NULL-terminated.
252 * If the string is being read by the user process, it is copied
253 * and a newline '\n' is added. It is truncated if the buffer is
254 * not large enough.
255 *
256 * Returns 0 on success.
257 */
258int proc_dostring(struct ctl_table *table, int write,
259		  void *buffer, size_t *lenp, loff_t *ppos)
260{
261	if (write)
262		proc_first_pos_non_zero_ignore(ppos, table);
263
264	return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
265			ppos);
266}
267
268static size_t proc_skip_spaces(char **buf)
269{
270	size_t ret;
271	char *tmp = skip_spaces(*buf);
272	ret = tmp - *buf;
273	*buf = tmp;
274	return ret;
275}
276
277static void proc_skip_char(char **buf, size_t *size, const char v)
278{
279	while (*size) {
280		if (**buf != v)
281			break;
282		(*size)--;
283		(*buf)++;
284	}
285}
286
287/**
288 * strtoul_lenient - parse an ASCII formatted integer from a buffer and only
289 *                   fail on overflow
290 *
291 * @cp: kernel buffer containing the string to parse
292 * @endp: pointer to store the trailing characters
293 * @base: the base to use
294 * @res: where the parsed integer will be stored
295 *
296 * In case of success 0 is returned and @res will contain the parsed integer,
297 * @endp will hold any trailing characters.
298 * This function will fail the parse on overflow. If there wasn't an overflow
299 * the function will defer the decision what characters count as invalid to the
300 * caller.
301 */
302static int strtoul_lenient(const char *cp, char **endp, unsigned int base,
303			   unsigned long *res)
304{
305	unsigned long long result;
306	unsigned int rv;
307
308	cp = _parse_integer_fixup_radix(cp, &base);
309	rv = _parse_integer(cp, base, &result);
310	if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result))
311		return -ERANGE;
312
313	cp += rv;
314
315	if (endp)
316		*endp = (char *)cp;
317
318	*res = (unsigned long)result;
319	return 0;
320}
321
322#define TMPBUFLEN 22
323/**
324 * proc_get_long - reads an ASCII formatted integer from a user buffer
325 *
326 * @buf: a kernel buffer
327 * @size: size of the kernel buffer
328 * @val: this is where the number will be stored
329 * @neg: set to %TRUE if number is negative
330 * @perm_tr: a vector which contains the allowed trailers
331 * @perm_tr_len: size of the perm_tr vector
332 * @tr: pointer to store the trailer character
333 *
334 * In case of success %0 is returned and @buf and @size are updated with
335 * the amount of bytes read. If @tr is non-NULL and a trailing
336 * character exists (size is non-zero after returning from this
337 * function), @tr is updated with the trailing character.
338 */
339static int proc_get_long(char **buf, size_t *size,
340			  unsigned long *val, bool *neg,
341			  const char *perm_tr, unsigned perm_tr_len, char *tr)
342{
343	int len;
344	char *p, tmp[TMPBUFLEN];
345
346	if (!*size)
347		return -EINVAL;
348
349	len = *size;
350	if (len > TMPBUFLEN - 1)
351		len = TMPBUFLEN - 1;
352
353	memcpy(tmp, *buf, len);
354
355	tmp[len] = 0;
356	p = tmp;
357	if (*p == '-' && *size > 1) {
358		*neg = true;
359		p++;
360	} else
361		*neg = false;
362	if (!isdigit(*p))
363		return -EINVAL;
364
365	if (strtoul_lenient(p, &p, 0, val))
366		return -EINVAL;
367
368	len = p - tmp;
369
370	/* We don't know if the next char is whitespace thus we may accept
371	 * invalid integers (e.g. 1234...a) or two integers instead of one
372	 * (e.g. 123...1). So lets not allow such large numbers. */
373	if (len == TMPBUFLEN - 1)
374		return -EINVAL;
375
376	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
377		return -EINVAL;
378
379	if (tr && (len < *size))
380		*tr = *p;
381
382	*buf += len;
383	*size -= len;
384
385	return 0;
386}
387
388/**
389 * proc_put_long - converts an integer to a decimal ASCII formatted string
390 *
391 * @buf: the user buffer
392 * @size: the size of the user buffer
393 * @val: the integer to be converted
394 * @neg: sign of the number, %TRUE for negative
395 *
396 * In case of success @buf and @size are updated with the amount of bytes
397 * written.
398 */
399static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
400{
401	int len;
402	char tmp[TMPBUFLEN], *p = tmp;
403
404	sprintf(p, "%s%lu", neg ? "-" : "", val);
405	len = strlen(tmp);
406	if (len > *size)
407		len = *size;
408	memcpy(*buf, tmp, len);
409	*size -= len;
410	*buf += len;
411}
412#undef TMPBUFLEN
413
414static void proc_put_char(void **buf, size_t *size, char c)
415{
416	if (*size) {
417		char **buffer = (char **)buf;
418		**buffer = c;
419
420		(*size)--;
421		(*buffer)++;
422		*buf = *buffer;
423	}
424}
425
426static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
427				int *valp,
428				int write, void *data)
429{
430	if (write) {
431		*(bool *)valp = *lvalp;
432	} else {
433		int val = *(bool *)valp;
434
435		*lvalp = (unsigned long)val;
436		*negp = false;
437	}
438	return 0;
439}
440
441static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
442				 int *valp,
443				 int write, void *data)
444{
445	if (write) {
446		if (*negp) {
447			if (*lvalp > (unsigned long) INT_MAX + 1)
448				return -EINVAL;
449			WRITE_ONCE(*valp, -*lvalp);
450		} else {
451			if (*lvalp > (unsigned long) INT_MAX)
452				return -EINVAL;
453			WRITE_ONCE(*valp, *lvalp);
454		}
455	} else {
456		int val = READ_ONCE(*valp);
457		if (val < 0) {
458			*negp = true;
459			*lvalp = -(unsigned long)val;
460		} else {
461			*negp = false;
462			*lvalp = (unsigned long)val;
463		}
464	}
465	return 0;
466}
467
468static int do_proc_douintvec_conv(unsigned long *lvalp,
469				  unsigned int *valp,
470				  int write, void *data)
471{
472	if (write) {
473		if (*lvalp > UINT_MAX)
474			return -EINVAL;
475		WRITE_ONCE(*valp, *lvalp);
476	} else {
477		unsigned int val = READ_ONCE(*valp);
478		*lvalp = (unsigned long)val;
479	}
480	return 0;
481}
482
483static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
484
485static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
486		  int write, void *buffer,
487		  size_t *lenp, loff_t *ppos,
488		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
489			      int write, void *data),
490		  void *data)
491{
492	int *i, vleft, first = 1, err = 0;
493	size_t left;
494	char *p;
495
496	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
497		*lenp = 0;
498		return 0;
499	}
500
501	i = (int *) tbl_data;
502	vleft = table->maxlen / sizeof(*i);
503	left = *lenp;
504
505	if (!conv)
506		conv = do_proc_dointvec_conv;
507
508	if (write) {
509		if (proc_first_pos_non_zero_ignore(ppos, table))
510			goto out;
511
512		if (left > PAGE_SIZE - 1)
513			left = PAGE_SIZE - 1;
514		p = buffer;
515	}
516
517	for (; left && vleft--; i++, first=0) {
518		unsigned long lval;
519		bool neg;
520
521		if (write) {
522			left -= proc_skip_spaces(&p);
523
524			if (!left)
525				break;
526			err = proc_get_long(&p, &left, &lval, &neg,
527					     proc_wspace_sep,
528					     sizeof(proc_wspace_sep), NULL);
529			if (err)
530				break;
531			if (conv(&neg, &lval, i, 1, data)) {
532				err = -EINVAL;
533				break;
534			}
535		} else {
536			if (conv(&neg, &lval, i, 0, data)) {
537				err = -EINVAL;
538				break;
539			}
540			if (!first)
541				proc_put_char(&buffer, &left, '\t');
542			proc_put_long(&buffer, &left, lval, neg);
543		}
544	}
545
546	if (!write && !first && left && !err)
547		proc_put_char(&buffer, &left, '\n');
548	if (write && !err && left)
549		left -= proc_skip_spaces(&p);
550	if (write && first)
551		return err ? : -EINVAL;
552	*lenp -= left;
553out:
554	*ppos += *lenp;
555	return err;
556}
557
558static int do_proc_dointvec(struct ctl_table *table, int write,
559		  void *buffer, size_t *lenp, loff_t *ppos,
560		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
561			      int write, void *data),
562		  void *data)
563{
564	return __do_proc_dointvec(table->data, table, write,
565			buffer, lenp, ppos, conv, data);
566}
567
568static int do_proc_douintvec_w(unsigned int *tbl_data,
569			       struct ctl_table *table,
570			       void *buffer,
571			       size_t *lenp, loff_t *ppos,
572			       int (*conv)(unsigned long *lvalp,
573					   unsigned int *valp,
574					   int write, void *data),
575			       void *data)
576{
577	unsigned long lval;
578	int err = 0;
579	size_t left;
580	bool neg;
581	char *p = buffer;
582
583	left = *lenp;
584
585	if (proc_first_pos_non_zero_ignore(ppos, table))
586		goto bail_early;
587
588	if (left > PAGE_SIZE - 1)
589		left = PAGE_SIZE - 1;
590
591	left -= proc_skip_spaces(&p);
592	if (!left) {
593		err = -EINVAL;
594		goto out_free;
595	}
596
597	err = proc_get_long(&p, &left, &lval, &neg,
598			     proc_wspace_sep,
599			     sizeof(proc_wspace_sep), NULL);
600	if (err || neg) {
601		err = -EINVAL;
602		goto out_free;
603	}
604
605	if (conv(&lval, tbl_data, 1, data)) {
606		err = -EINVAL;
607		goto out_free;
608	}
609
610	if (!err && left)
611		left -= proc_skip_spaces(&p);
612
613out_free:
614	if (err)
615		return -EINVAL;
616
617	return 0;
618
619	/* This is in keeping with old __do_proc_dointvec() */
620bail_early:
621	*ppos += *lenp;
622	return err;
623}
624
625static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
626			       size_t *lenp, loff_t *ppos,
627			       int (*conv)(unsigned long *lvalp,
628					   unsigned int *valp,
629					   int write, void *data),
630			       void *data)
631{
632	unsigned long lval;
633	int err = 0;
634	size_t left;
635
636	left = *lenp;
637
638	if (conv(&lval, tbl_data, 0, data)) {
639		err = -EINVAL;
640		goto out;
641	}
642
643	proc_put_long(&buffer, &left, lval, false);
644	if (!left)
645		goto out;
646
647	proc_put_char(&buffer, &left, '\n');
648
649out:
650	*lenp -= left;
651	*ppos += *lenp;
652
653	return err;
654}
655
656static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
657			       int write, void *buffer,
658			       size_t *lenp, loff_t *ppos,
659			       int (*conv)(unsigned long *lvalp,
660					   unsigned int *valp,
661					   int write, void *data),
662			       void *data)
663{
664	unsigned int *i, vleft;
665
666	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
667		*lenp = 0;
668		return 0;
669	}
670
671	i = (unsigned int *) tbl_data;
672	vleft = table->maxlen / sizeof(*i);
673
674	/*
675	 * Arrays are not supported, keep this simple. *Do not* add
676	 * support for them.
677	 */
678	if (vleft != 1) {
679		*lenp = 0;
680		return -EINVAL;
681	}
682
683	if (!conv)
684		conv = do_proc_douintvec_conv;
685
686	if (write)
687		return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
688					   conv, data);
689	return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
690}
691
692int do_proc_douintvec(struct ctl_table *table, int write,
693		      void *buffer, size_t *lenp, loff_t *ppos,
694		      int (*conv)(unsigned long *lvalp,
695				  unsigned int *valp,
696				  int write, void *data),
697		      void *data)
698{
699	return __do_proc_douintvec(table->data, table, write,
700				   buffer, lenp, ppos, conv, data);
701}
702
703/**
704 * proc_dobool - read/write a bool
705 * @table: the sysctl table
706 * @write: %TRUE if this is a write to the sysctl file
707 * @buffer: the user buffer
708 * @lenp: the size of the user buffer
709 * @ppos: file position
710 *
711 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
712 * values from/to the user buffer, treated as an ASCII string.
713 *
714 * Returns 0 on success.
715 */
716int proc_dobool(struct ctl_table *table, int write, void *buffer,
717		size_t *lenp, loff_t *ppos)
718{
719	return do_proc_dointvec(table, write, buffer, lenp, ppos,
720				do_proc_dobool_conv, NULL);
721}
722
723/**
724 * proc_dointvec - read a vector of integers
725 * @table: the sysctl table
726 * @write: %TRUE if this is a write to the sysctl file
727 * @buffer: the user buffer
728 * @lenp: the size of the user buffer
729 * @ppos: file position
730 *
731 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
732 * values from/to the user buffer, treated as an ASCII string.
733 *
734 * Returns 0 on success.
735 */
736int proc_dointvec(struct ctl_table *table, int write, void *buffer,
737		  size_t *lenp, loff_t *ppos)
738{
739	return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
740}
741
742#ifdef CONFIG_COMPACTION
743static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
744		int write, void *buffer, size_t *lenp, loff_t *ppos)
745{
746	int ret, old;
747
748	if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write)
749		return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
750
751	old = *(int *)table->data;
752	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
753	if (ret)
754		return ret;
755	if (old != *(int *)table->data)
756		pr_warn_once("sysctl attribute %s changed by %s[%d]\n",
757			     table->procname, current->comm,
758			     task_pid_nr(current));
759	return ret;
760}
761#endif
762
763/**
764 * proc_douintvec - read a vector of unsigned integers
765 * @table: the sysctl table
766 * @write: %TRUE if this is a write to the sysctl file
767 * @buffer: the user buffer
768 * @lenp: the size of the user buffer
769 * @ppos: file position
770 *
771 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
772 * values from/to the user buffer, treated as an ASCII string.
773 *
774 * Returns 0 on success.
775 */
776int proc_douintvec(struct ctl_table *table, int write, void *buffer,
777		size_t *lenp, loff_t *ppos)
778{
779	return do_proc_douintvec(table, write, buffer, lenp, ppos,
780				 do_proc_douintvec_conv, NULL);
781}
782
783/*
784 * Taint values can only be increased
785 * This means we can safely use a temporary.
786 */
787static int proc_taint(struct ctl_table *table, int write,
788			       void *buffer, size_t *lenp, loff_t *ppos)
789{
790	struct ctl_table t;
791	unsigned long tmptaint = get_taint();
792	int err;
793
794	if (write && !capable(CAP_SYS_ADMIN))
795		return -EPERM;
796
797	t = *table;
798	t.data = &tmptaint;
799	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
800	if (err < 0)
801		return err;
802
803	if (write) {
804		int i;
805
806		/*
807		 * If we are relying on panic_on_taint not producing
808		 * false positives due to userspace input, bail out
809		 * before setting the requested taint flags.
810		 */
811		if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint))
812			return -EINVAL;
813
814		/*
815		 * Poor man's atomic or. Not worth adding a primitive
816		 * to everyone's atomic.h for this
817		 */
818		for (i = 0; i < TAINT_FLAGS_COUNT; i++)
819			if ((1UL << i) & tmptaint)
820				add_taint(i, LOCKDEP_STILL_OK);
821	}
822
823	return err;
824}
825
826/**
827 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
828 * @min: pointer to minimum allowable value
829 * @max: pointer to maximum allowable value
830 *
831 * The do_proc_dointvec_minmax_conv_param structure provides the
832 * minimum and maximum values for doing range checking for those sysctl
833 * parameters that use the proc_dointvec_minmax() handler.
834 */
835struct do_proc_dointvec_minmax_conv_param {
836	int *min;
837	int *max;
838};
839
840static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
841					int *valp,
842					int write, void *data)
843{
844	int tmp, ret;
845	struct do_proc_dointvec_minmax_conv_param *param = data;
846	/*
847	 * If writing, first do so via a temporary local int so we can
848	 * bounds-check it before touching *valp.
849	 */
850	int *ip = write ? &tmp : valp;
851
852	ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data);
853	if (ret)
854		return ret;
855
856	if (write) {
857		if ((param->min && *param->min > tmp) ||
858		    (param->max && *param->max < tmp))
859			return -EINVAL;
860		WRITE_ONCE(*valp, tmp);
861	}
862
863	return 0;
864}
865
866/**
867 * proc_dointvec_minmax - read a vector of integers with min/max values
868 * @table: the sysctl table
869 * @write: %TRUE if this is a write to the sysctl file
870 * @buffer: the user buffer
871 * @lenp: the size of the user buffer
872 * @ppos: file position
873 *
874 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
875 * values from/to the user buffer, treated as an ASCII string.
876 *
877 * This routine will ensure the values are within the range specified by
878 * table->extra1 (min) and table->extra2 (max).
879 *
880 * Returns 0 on success or -EINVAL on write when the range check fails.
881 */
882int proc_dointvec_minmax(struct ctl_table *table, int write,
883		  void *buffer, size_t *lenp, loff_t *ppos)
884{
885	struct do_proc_dointvec_minmax_conv_param param = {
886		.min = (int *) table->extra1,
887		.max = (int *) table->extra2,
888	};
889	return do_proc_dointvec(table, write, buffer, lenp, ppos,
890				do_proc_dointvec_minmax_conv, &param);
891}
892
893/**
894 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
895 * @min: pointer to minimum allowable value
896 * @max: pointer to maximum allowable value
897 *
898 * The do_proc_douintvec_minmax_conv_param structure provides the
899 * minimum and maximum values for doing range checking for those sysctl
900 * parameters that use the proc_douintvec_minmax() handler.
901 */
902struct do_proc_douintvec_minmax_conv_param {
903	unsigned int *min;
904	unsigned int *max;
905};
906
907static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
908					 unsigned int *valp,
909					 int write, void *data)
910{
911	int ret;
912	unsigned int tmp;
913	struct do_proc_douintvec_minmax_conv_param *param = data;
914	/* write via temporary local uint for bounds-checking */
915	unsigned int *up = write ? &tmp : valp;
916
917	ret = do_proc_douintvec_conv(lvalp, up, write, data);
918	if (ret)
919		return ret;
920
921	if (write) {
922		if ((param->min && *param->min > tmp) ||
923		    (param->max && *param->max < tmp))
924			return -ERANGE;
925
926		WRITE_ONCE(*valp, tmp);
927	}
928
929	return 0;
930}
931
932/**
933 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
934 * @table: the sysctl table
935 * @write: %TRUE if this is a write to the sysctl file
936 * @buffer: the user buffer
937 * @lenp: the size of the user buffer
938 * @ppos: file position
939 *
940 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
941 * values from/to the user buffer, treated as an ASCII string. Negative
942 * strings are not allowed.
943 *
944 * This routine will ensure the values are within the range specified by
945 * table->extra1 (min) and table->extra2 (max). There is a final sanity
946 * check for UINT_MAX to avoid having to support wrap around uses from
947 * userspace.
948 *
949 * Returns 0 on success or -ERANGE on write when the range check fails.
950 */
951int proc_douintvec_minmax(struct ctl_table *table, int write,
952			  void *buffer, size_t *lenp, loff_t *ppos)
953{
954	struct do_proc_douintvec_minmax_conv_param param = {
955		.min = (unsigned int *) table->extra1,
956		.max = (unsigned int *) table->extra2,
957	};
958	return do_proc_douintvec(table, write, buffer, lenp, ppos,
959				 do_proc_douintvec_minmax_conv, &param);
960}
961
962/**
963 * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values
964 * @table: the sysctl table
965 * @write: %TRUE if this is a write to the sysctl file
966 * @buffer: the user buffer
967 * @lenp: the size of the user buffer
968 * @ppos: file position
969 *
970 * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars
971 * values from/to the user buffer, treated as an ASCII string. Negative
972 * strings are not allowed.
973 *
974 * This routine will ensure the values are within the range specified by
975 * table->extra1 (min) and table->extra2 (max).
976 *
977 * Returns 0 on success or an error on write when the range check fails.
978 */
979int proc_dou8vec_minmax(struct ctl_table *table, int write,
980			void *buffer, size_t *lenp, loff_t *ppos)
981{
982	struct ctl_table tmp;
983	unsigned int min = 0, max = 255U, val;
984	u8 *data = table->data;
985	struct do_proc_douintvec_minmax_conv_param param = {
986		.min = &min,
987		.max = &max,
988	};
989	int res;
990
991	/* Do not support arrays yet. */
992	if (table->maxlen != sizeof(u8))
993		return -EINVAL;
994
995	if (table->extra1) {
996		min = *(unsigned int *) table->extra1;
997		if (min > 255U)
998			return -EINVAL;
999	}
1000	if (table->extra2) {
1001		max = *(unsigned int *) table->extra2;
1002		if (max > 255U)
1003			return -EINVAL;
1004	}
1005
1006	tmp = *table;
1007
1008	tmp.maxlen = sizeof(val);
1009	tmp.data = &val;
1010	val = READ_ONCE(*data);
1011	res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
1012				do_proc_douintvec_minmax_conv, &param);
1013	if (res)
1014		return res;
1015	if (write)
1016		WRITE_ONCE(*data, val);
1017	return 0;
1018}
1019EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
1020
1021#ifdef CONFIG_MAGIC_SYSRQ
1022static int sysrq_sysctl_handler(struct ctl_table *table, int write,
1023				void *buffer, size_t *lenp, loff_t *ppos)
1024{
1025	int tmp, ret;
1026
1027	tmp = sysrq_mask();
1028
1029	ret = __do_proc_dointvec(&tmp, table, write, buffer,
1030			       lenp, ppos, NULL, NULL);
1031	if (ret || !write)
1032		return ret;
1033
1034	if (write)
1035		sysrq_toggle_support(tmp);
1036
1037	return 0;
1038}
1039#endif
1040
1041static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
1042		int write, void *buffer, size_t *lenp, loff_t *ppos,
1043		unsigned long convmul, unsigned long convdiv)
1044{
1045	unsigned long *i, *min, *max;
1046	int vleft, first = 1, err = 0;
1047	size_t left;
1048	char *p;
1049
1050	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1051		*lenp = 0;
1052		return 0;
1053	}
1054
1055	i = (unsigned long *) data;
1056	min = (unsigned long *) table->extra1;
1057	max = (unsigned long *) table->extra2;
1058	vleft = table->maxlen / sizeof(unsigned long);
1059	left = *lenp;
1060
1061	if (write) {
1062		if (proc_first_pos_non_zero_ignore(ppos, table))
1063			goto out;
1064
1065		if (left > PAGE_SIZE - 1)
1066			left = PAGE_SIZE - 1;
1067		p = buffer;
1068	}
1069
1070	for (; left && vleft--; i++, first = 0) {
1071		unsigned long val;
1072
1073		if (write) {
1074			bool neg;
1075
1076			left -= proc_skip_spaces(&p);
1077			if (!left)
1078				break;
1079
1080			err = proc_get_long(&p, &left, &val, &neg,
1081					     proc_wspace_sep,
1082					     sizeof(proc_wspace_sep), NULL);
1083			if (err || neg) {
1084				err = -EINVAL;
1085				break;
1086			}
1087
1088			val = convmul * val / convdiv;
1089			if ((min && val < *min) || (max && val > *max)) {
1090				err = -EINVAL;
1091				break;
1092			}
1093			WRITE_ONCE(*i, val);
1094		} else {
1095			val = convdiv * READ_ONCE(*i) / convmul;
1096			if (!first)
1097				proc_put_char(&buffer, &left, '\t');
1098			proc_put_long(&buffer, &left, val, false);
1099		}
1100	}
1101
1102	if (!write && !first && left && !err)
1103		proc_put_char(&buffer, &left, '\n');
1104	if (write && !err)
1105		left -= proc_skip_spaces(&p);
1106	if (write && first)
1107		return err ? : -EINVAL;
1108	*lenp -= left;
1109out:
1110	*ppos += *lenp;
1111	return err;
1112}
1113
1114static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
1115		void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
1116		unsigned long convdiv)
1117{
1118	return __do_proc_doulongvec_minmax(table->data, table, write,
1119			buffer, lenp, ppos, convmul, convdiv);
1120}
1121
1122/**
1123 * proc_doulongvec_minmax - read a vector of long integers with min/max values
1124 * @table: the sysctl table
1125 * @write: %TRUE if this is a write to the sysctl file
1126 * @buffer: the user buffer
1127 * @lenp: the size of the user buffer
1128 * @ppos: file position
1129 *
1130 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1131 * values from/to the user buffer, treated as an ASCII string.
1132 *
1133 * This routine will ensure the values are within the range specified by
1134 * table->extra1 (min) and table->extra2 (max).
1135 *
1136 * Returns 0 on success.
1137 */
1138int proc_doulongvec_minmax(struct ctl_table *table, int write,
1139			   void *buffer, size_t *lenp, loff_t *ppos)
1140{
1141    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1142}
1143
1144/**
1145 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1146 * @table: the sysctl table
1147 * @write: %TRUE if this is a write to the sysctl file
1148 * @buffer: the user buffer
1149 * @lenp: the size of the user buffer
1150 * @ppos: file position
1151 *
1152 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1153 * values from/to the user buffer, treated as an ASCII string. The values
1154 * are treated as milliseconds, and converted to jiffies when they are stored.
1155 *
1156 * This routine will ensure the values are within the range specified by
1157 * table->extra1 (min) and table->extra2 (max).
1158 *
1159 * Returns 0 on success.
1160 */
1161int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1162				      void *buffer, size_t *lenp, loff_t *ppos)
1163{
1164    return do_proc_doulongvec_minmax(table, write, buffer,
1165				     lenp, ppos, HZ, 1000l);
1166}
1167
1168
1169static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1170					 int *valp,
1171					 int write, void *data)
1172{
1173	if (write) {
1174		if (*lvalp > INT_MAX / HZ)
1175			return 1;
1176		if (*negp)
1177			WRITE_ONCE(*valp, -*lvalp * HZ);
1178		else
1179			WRITE_ONCE(*valp, *lvalp * HZ);
1180	} else {
1181		int val = READ_ONCE(*valp);
1182		unsigned long lval;
1183		if (val < 0) {
1184			*negp = true;
1185			lval = -(unsigned long)val;
1186		} else {
1187			*negp = false;
1188			lval = (unsigned long)val;
1189		}
1190		*lvalp = lval / HZ;
1191	}
1192	return 0;
1193}
1194
1195static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1196						int *valp,
1197						int write, void *data)
1198{
1199	if (write) {
1200		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1201			return 1;
1202		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1203	} else {
1204		int val = *valp;
1205		unsigned long lval;
1206		if (val < 0) {
1207			*negp = true;
1208			lval = -(unsigned long)val;
1209		} else {
1210			*negp = false;
1211			lval = (unsigned long)val;
1212		}
1213		*lvalp = jiffies_to_clock_t(lval);
1214	}
1215	return 0;
1216}
1217
1218static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1219					    int *valp,
1220					    int write, void *data)
1221{
1222	if (write) {
1223		unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1224
1225		if (jif > INT_MAX)
1226			return 1;
1227		WRITE_ONCE(*valp, (int)jif);
1228	} else {
1229		int val = READ_ONCE(*valp);
1230		unsigned long lval;
1231		if (val < 0) {
1232			*negp = true;
1233			lval = -(unsigned long)val;
1234		} else {
1235			*negp = false;
1236			lval = (unsigned long)val;
1237		}
1238		*lvalp = jiffies_to_msecs(lval);
1239	}
1240	return 0;
1241}
1242
1243static int do_proc_dointvec_ms_jiffies_minmax_conv(bool *negp, unsigned long *lvalp,
1244						int *valp, int write, void *data)
1245{
1246	int tmp, ret;
1247	struct do_proc_dointvec_minmax_conv_param *param = data;
1248	/*
1249	 * If writing, first do so via a temporary local int so we can
1250	 * bounds-check it before touching *valp.
1251	 */
1252	int *ip = write ? &tmp : valp;
1253
1254	ret = do_proc_dointvec_ms_jiffies_conv(negp, lvalp, ip, write, data);
1255	if (ret)
1256		return ret;
1257
1258	if (write) {
1259		if ((param->min && *param->min > tmp) ||
1260				(param->max && *param->max < tmp))
1261			return -EINVAL;
1262		*valp = tmp;
1263	}
1264	return 0;
1265}
1266
1267/**
1268 * proc_dointvec_jiffies - read a vector of integers as seconds
1269 * @table: the sysctl table
1270 * @write: %TRUE if this is a write to the sysctl file
1271 * @buffer: the user buffer
1272 * @lenp: the size of the user buffer
1273 * @ppos: file position
1274 *
1275 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1276 * values from/to the user buffer, treated as an ASCII string.
1277 * The values read are assumed to be in seconds, and are converted into
1278 * jiffies.
1279 *
1280 * Returns 0 on success.
1281 */
1282int proc_dointvec_jiffies(struct ctl_table *table, int write,
1283			  void *buffer, size_t *lenp, loff_t *ppos)
1284{
1285    return do_proc_dointvec(table,write,buffer,lenp,ppos,
1286		    	    do_proc_dointvec_jiffies_conv,NULL);
1287}
1288
1289int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1290			  void *buffer, size_t *lenp, loff_t *ppos)
1291{
1292	struct do_proc_dointvec_minmax_conv_param param = {
1293		.min = (int *) table->extra1,
1294		.max = (int *) table->extra2,
1295	};
1296	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1297			do_proc_dointvec_ms_jiffies_minmax_conv, &param);
1298}
1299
1300/**
1301 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
1302 * @table: the sysctl table
1303 * @write: %TRUE if this is a write to the sysctl file
1304 * @buffer: the user buffer
1305 * @lenp: the size of the user buffer
1306 * @ppos: pointer to the file position
1307 *
1308 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1309 * values from/to the user buffer, treated as an ASCII string.
1310 * The values read are assumed to be in 1/USER_HZ seconds, and
1311 * are converted into jiffies.
1312 *
1313 * Returns 0 on success.
1314 */
1315int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1316				 void *buffer, size_t *lenp, loff_t *ppos)
1317{
1318	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1319				do_proc_dointvec_userhz_jiffies_conv, NULL);
1320}
1321
1322/**
1323 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
1324 * @table: the sysctl table
1325 * @write: %TRUE if this is a write to the sysctl file
1326 * @buffer: the user buffer
1327 * @lenp: the size of the user buffer
1328 * @ppos: file position
1329 * @ppos: the current position in the file
1330 *
1331 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1332 * values from/to the user buffer, treated as an ASCII string.
1333 * The values read are assumed to be in 1/1000 seconds, and
1334 * are converted into jiffies.
1335 *
1336 * Returns 0 on success.
1337 */
1338int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
1339		size_t *lenp, loff_t *ppos)
1340{
1341	return do_proc_dointvec(table, write, buffer, lenp, ppos,
1342				do_proc_dointvec_ms_jiffies_conv, NULL);
1343}
1344
1345static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
1346		size_t *lenp, loff_t *ppos)
1347{
1348	struct pid *new_pid;
1349	pid_t tmp;
1350	int r;
1351
1352	tmp = pid_vnr(cad_pid);
1353
1354	r = __do_proc_dointvec(&tmp, table, write, buffer,
1355			       lenp, ppos, NULL, NULL);
1356	if (r || !write)
1357		return r;
1358
1359	new_pid = find_get_pid(tmp);
1360	if (!new_pid)
1361		return -ESRCH;
1362
1363	put_pid(xchg(&cad_pid, new_pid));
1364	return 0;
1365}
1366
1367/**
1368 * proc_do_large_bitmap - read/write from/to a large bitmap
1369 * @table: the sysctl table
1370 * @write: %TRUE if this is a write to the sysctl file
1371 * @buffer: the user buffer
1372 * @lenp: the size of the user buffer
1373 * @ppos: file position
1374 *
1375 * The bitmap is stored at table->data and the bitmap length (in bits)
1376 * in table->maxlen.
1377 *
1378 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
1379 * large bitmaps may be represented in a compact manner. Writing into
1380 * the file will clear the bitmap then update it with the given input.
1381 *
1382 * Returns 0 on success.
1383 */
1384int proc_do_large_bitmap(struct ctl_table *table, int write,
1385			 void *buffer, size_t *lenp, loff_t *ppos)
1386{
1387	int err = 0;
1388	size_t left = *lenp;
1389	unsigned long bitmap_len = table->maxlen;
1390	unsigned long *bitmap = *(unsigned long **) table->data;
1391	unsigned long *tmp_bitmap = NULL;
1392	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
1393
1394	if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
1395		*lenp = 0;
1396		return 0;
1397	}
1398
1399	if (write) {
1400		char *p = buffer;
1401		size_t skipped = 0;
1402
1403		if (left > PAGE_SIZE - 1) {
1404			left = PAGE_SIZE - 1;
1405			/* How much of the buffer we'll skip this pass */
1406			skipped = *lenp - left;
1407		}
1408
1409		tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
1410		if (!tmp_bitmap)
1411			return -ENOMEM;
1412		proc_skip_char(&p, &left, '\n');
1413		while (!err && left) {
1414			unsigned long val_a, val_b;
1415			bool neg;
1416			size_t saved_left;
1417
1418			/* In case we stop parsing mid-number, we can reset */
1419			saved_left = left;
1420			err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
1421					     sizeof(tr_a), &c);
1422			/*
1423			 * If we consumed the entirety of a truncated buffer or
1424			 * only one char is left (may be a "-"), then stop here,
1425			 * reset, & come back for more.
1426			 */
1427			if ((left <= 1) && skipped) {
1428				left = saved_left;
1429				break;
1430			}
1431
1432			if (err)
1433				break;
1434			if (val_a >= bitmap_len || neg) {
1435				err = -EINVAL;
1436				break;
1437			}
1438
1439			val_b = val_a;
1440			if (left) {
1441				p++;
1442				left--;
1443			}
1444
1445			if (c == '-') {
1446				err = proc_get_long(&p, &left, &val_b,
1447						     &neg, tr_b, sizeof(tr_b),
1448						     &c);
1449				/*
1450				 * If we consumed all of a truncated buffer or
1451				 * then stop here, reset, & come back for more.
1452				 */
1453				if (!left && skipped) {
1454					left = saved_left;
1455					break;
1456				}
1457
1458				if (err)
1459					break;
1460				if (val_b >= bitmap_len || neg ||
1461				    val_a > val_b) {
1462					err = -EINVAL;
1463					break;
1464				}
1465				if (left) {
1466					p++;
1467					left--;
1468				}
1469			}
1470
1471			bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
1472			proc_skip_char(&p, &left, '\n');
1473		}
1474		left += skipped;
1475	} else {
1476		unsigned long bit_a, bit_b = 0;
1477		bool first = 1;
1478
1479		while (left) {
1480			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
1481			if (bit_a >= bitmap_len)
1482				break;
1483			bit_b = find_next_zero_bit(bitmap, bitmap_len,
1484						   bit_a + 1) - 1;
1485
1486			if (!first)
1487				proc_put_char(&buffer, &left, ',');
1488			proc_put_long(&buffer, &left, bit_a, false);
1489			if (bit_a != bit_b) {
1490				proc_put_char(&buffer, &left, '-');
1491				proc_put_long(&buffer, &left, bit_b, false);
1492			}
1493
1494			first = 0; bit_b++;
1495		}
1496		proc_put_char(&buffer, &left, '\n');
1497	}
1498
1499	if (!err) {
1500		if (write) {
1501			if (*ppos)
1502				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
1503			else
1504				bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
1505		}
1506		*lenp -= left;
1507		*ppos += *lenp;
1508	}
1509
1510	bitmap_free(tmp_bitmap);
1511	return err;
1512}
1513
1514#else /* CONFIG_PROC_SYSCTL */
1515
1516int proc_dostring(struct ctl_table *table, int write,
1517		  void *buffer, size_t *lenp, loff_t *ppos)
1518{
1519	return -ENOSYS;
1520}
1521
1522int proc_dobool(struct ctl_table *table, int write,
1523		void *buffer, size_t *lenp, loff_t *ppos)
1524{
1525	return -ENOSYS;
1526}
1527
1528int proc_dointvec(struct ctl_table *table, int write,
1529		  void *buffer, size_t *lenp, loff_t *ppos)
1530{
1531	return -ENOSYS;
1532}
1533
1534int proc_douintvec(struct ctl_table *table, int write,
1535		  void *buffer, size_t *lenp, loff_t *ppos)
1536{
1537	return -ENOSYS;
1538}
1539
1540int proc_dointvec_minmax(struct ctl_table *table, int write,
1541		    void *buffer, size_t *lenp, loff_t *ppos)
1542{
1543	return -ENOSYS;
1544}
1545
1546int proc_douintvec_minmax(struct ctl_table *table, int write,
1547			  void *buffer, size_t *lenp, loff_t *ppos)
1548{
1549	return -ENOSYS;
1550}
1551
1552int proc_dou8vec_minmax(struct ctl_table *table, int write,
1553			void *buffer, size_t *lenp, loff_t *ppos)
1554{
1555	return -ENOSYS;
1556}
1557
1558int proc_dointvec_jiffies(struct ctl_table *table, int write,
1559		    void *buffer, size_t *lenp, loff_t *ppos)
1560{
1561	return -ENOSYS;
1562}
1563
1564int proc_dointvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1565				    void *buffer, size_t *lenp, loff_t *ppos)
1566{
1567	return -ENOSYS;
1568}
1569
1570int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1571		    void *buffer, size_t *lenp, loff_t *ppos)
1572{
1573	return -ENOSYS;
1574}
1575
1576int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1577			     void *buffer, size_t *lenp, loff_t *ppos)
1578{
1579	return -ENOSYS;
1580}
1581
1582int proc_doulongvec_minmax(struct ctl_table *table, int write,
1583		    void *buffer, size_t *lenp, loff_t *ppos)
1584{
1585	return -ENOSYS;
1586}
1587
1588int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1589				      void *buffer, size_t *lenp, loff_t *ppos)
1590{
1591	return -ENOSYS;
1592}
1593
1594int proc_do_large_bitmap(struct ctl_table *table, int write,
1595			 void *buffer, size_t *lenp, loff_t *ppos)
1596{
1597	return -ENOSYS;
1598}
1599
1600#endif /* CONFIG_PROC_SYSCTL */
1601
1602#if defined(CONFIG_SYSCTL)
1603int proc_do_static_key(struct ctl_table *table, int write,
1604		       void *buffer, size_t *lenp, loff_t *ppos)
1605{
1606	struct static_key *key = (struct static_key *)table->data;
1607	static DEFINE_MUTEX(static_key_mutex);
1608	int val, ret;
1609	struct ctl_table tmp = {
1610		.data   = &val,
1611		.maxlen = sizeof(val),
1612		.mode   = table->mode,
1613		.extra1 = SYSCTL_ZERO,
1614		.extra2 = SYSCTL_ONE,
1615	};
1616
1617	if (write && !capable(CAP_SYS_ADMIN))
1618		return -EPERM;
1619
1620	mutex_lock(&static_key_mutex);
1621	val = static_key_enabled(key);
1622	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1623	if (write && !ret) {
1624		if (val)
1625			static_key_enable(key);
1626		else
1627			static_key_disable(key);
1628	}
1629	mutex_unlock(&static_key_mutex);
1630	return ret;
1631}
1632
1633static struct ctl_table kern_table[] = {
1634#ifdef CONFIG_NUMA_BALANCING
1635	{
1636		.procname	= "numa_balancing",
1637		.data		= NULL, /* filled in by handler */
1638		.maxlen		= sizeof(unsigned int),
1639		.mode		= 0644,
1640		.proc_handler	= sysctl_numa_balancing,
1641		.extra1		= SYSCTL_ZERO,
1642		.extra2		= SYSCTL_FOUR,
1643	},
1644#endif /* CONFIG_NUMA_BALANCING */
1645	{
1646		.procname	= "panic",
1647		.data		= &panic_timeout,
1648		.maxlen		= sizeof(int),
1649		.mode		= 0644,
1650		.proc_handler	= proc_dointvec,
1651	},
1652#ifdef CONFIG_PROC_SYSCTL
1653	{
1654		.procname	= "tainted",
1655		.maxlen 	= sizeof(long),
1656		.mode		= 0644,
1657		.proc_handler	= proc_taint,
1658	},
1659	{
1660		.procname	= "sysctl_writes_strict",
1661		.data		= &sysctl_writes_strict,
1662		.maxlen		= sizeof(int),
1663		.mode		= 0644,
1664		.proc_handler	= proc_dointvec_minmax,
1665		.extra1		= SYSCTL_NEG_ONE,
1666		.extra2		= SYSCTL_ONE,
1667	},
1668#endif
1669	{
1670		.procname	= "print-fatal-signals",
1671		.data		= &print_fatal_signals,
1672		.maxlen		= sizeof(int),
1673		.mode		= 0644,
1674		.proc_handler	= proc_dointvec,
1675	},
1676#ifdef CONFIG_SPARC
1677	{
1678		.procname	= "reboot-cmd",
1679		.data		= reboot_command,
1680		.maxlen		= 256,
1681		.mode		= 0644,
1682		.proc_handler	= proc_dostring,
1683	},
1684	{
1685		.procname	= "stop-a",
1686		.data		= &stop_a_enabled,
1687		.maxlen		= sizeof (int),
1688		.mode		= 0644,
1689		.proc_handler	= proc_dointvec,
1690	},
1691	{
1692		.procname	= "scons-poweroff",
1693		.data		= &scons_pwroff,
1694		.maxlen		= sizeof (int),
1695		.mode		= 0644,
1696		.proc_handler	= proc_dointvec,
1697	},
1698#endif
1699#ifdef CONFIG_SPARC64
1700	{
1701		.procname	= "tsb-ratio",
1702		.data		= &sysctl_tsb_ratio,
1703		.maxlen		= sizeof (int),
1704		.mode		= 0644,
1705		.proc_handler	= proc_dointvec,
1706	},
1707#endif
1708#ifdef CONFIG_PARISC
1709	{
1710		.procname	= "soft-power",
1711		.data		= &pwrsw_enabled,
1712		.maxlen		= sizeof (int),
1713		.mode		= 0644,
1714		.proc_handler	= proc_dointvec,
1715	},
1716#endif
1717#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
1718	{
1719		.procname	= "unaligned-trap",
1720		.data		= &unaligned_enabled,
1721		.maxlen		= sizeof (int),
1722		.mode		= 0644,
1723		.proc_handler	= proc_dointvec,
1724	},
1725#endif
1726#ifdef CONFIG_STACK_TRACER
1727	{
1728		.procname	= "stack_tracer_enabled",
1729		.data		= &stack_tracer_enabled,
1730		.maxlen		= sizeof(int),
1731		.mode		= 0644,
1732		.proc_handler	= stack_trace_sysctl,
1733	},
1734#endif
1735#ifdef CONFIG_TRACING
1736	{
1737		.procname	= "ftrace_dump_on_oops",
1738		.data		= &ftrace_dump_on_oops,
1739		.maxlen		= sizeof(int),
1740		.mode		= 0644,
1741		.proc_handler	= proc_dointvec,
1742	},
1743	{
1744		.procname	= "traceoff_on_warning",
1745		.data		= &__disable_trace_on_warning,
1746		.maxlen		= sizeof(__disable_trace_on_warning),
1747		.mode		= 0644,
1748		.proc_handler	= proc_dointvec,
1749	},
1750	{
1751		.procname	= "tracepoint_printk",
1752		.data		= &tracepoint_printk,
1753		.maxlen		= sizeof(tracepoint_printk),
1754		.mode		= 0644,
1755		.proc_handler	= tracepoint_printk_sysctl,
1756	},
1757#endif
1758#ifdef CONFIG_MODULES
1759	{
1760		.procname	= "modprobe",
1761		.data		= &modprobe_path,
1762		.maxlen		= KMOD_PATH_LEN,
1763		.mode		= 0644,
1764		.proc_handler	= proc_dostring,
1765	},
1766	{
1767		.procname	= "modules_disabled",
1768		.data		= &modules_disabled,
1769		.maxlen		= sizeof(int),
1770		.mode		= 0644,
1771		/* only handle a transition from default "0" to "1" */
1772		.proc_handler	= proc_dointvec_minmax,
1773		.extra1		= SYSCTL_ONE,
1774		.extra2		= SYSCTL_ONE,
1775	},
1776#endif
1777#ifdef CONFIG_UEVENT_HELPER
1778	{
1779		.procname	= "hotplug",
1780		.data		= &uevent_helper,
1781		.maxlen		= UEVENT_HELPER_PATH_LEN,
1782		.mode		= 0644,
1783		.proc_handler	= proc_dostring,
1784	},
1785#endif
1786#ifdef CONFIG_MAGIC_SYSRQ
1787	{
1788		.procname	= "sysrq",
1789		.data		= NULL,
1790		.maxlen		= sizeof (int),
1791		.mode		= 0644,
1792		.proc_handler	= sysrq_sysctl_handler,
1793	},
1794#endif
1795#ifdef CONFIG_PROC_SYSCTL
1796	{
1797		.procname	= "cad_pid",
1798		.data		= NULL,
1799		.maxlen		= sizeof (int),
1800		.mode		= 0600,
1801		.proc_handler	= proc_do_cad_pid,
1802	},
1803#endif
1804	{
1805		.procname	= "threads-max",
1806		.data		= NULL,
1807		.maxlen		= sizeof(int),
1808		.mode		= 0644,
1809		.proc_handler	= sysctl_max_threads,
1810	},
1811	{
1812		.procname	= "usermodehelper",
1813		.mode		= 0555,
1814		.child		= usermodehelper_table,
1815	},
1816	{
1817		.procname	= "overflowuid",
1818		.data		= &overflowuid,
1819		.maxlen		= sizeof(int),
1820		.mode		= 0644,
1821		.proc_handler	= proc_dointvec_minmax,
1822		.extra1		= SYSCTL_ZERO,
1823		.extra2		= SYSCTL_MAXOLDUID,
1824	},
1825	{
1826		.procname	= "overflowgid",
1827		.data		= &overflowgid,
1828		.maxlen		= sizeof(int),
1829		.mode		= 0644,
1830		.proc_handler	= proc_dointvec_minmax,
1831		.extra1		= SYSCTL_ZERO,
1832		.extra2		= SYSCTL_MAXOLDUID,
1833	},
1834#ifdef CONFIG_S390
1835	{
1836		.procname	= "userprocess_debug",
1837		.data		= &show_unhandled_signals,
1838		.maxlen		= sizeof(int),
1839		.mode		= 0644,
1840		.proc_handler	= proc_dointvec,
1841	},
1842#endif
1843	{
1844		.procname	= "pid_max",
1845		.data		= &pid_max,
1846		.maxlen		= sizeof (int),
1847		.mode		= 0644,
1848		.proc_handler	= proc_dointvec_minmax,
1849		.extra1		= &pid_max_min,
1850		.extra2		= &pid_max_max,
1851	},
1852	{
1853		.procname	= "panic_on_oops",
1854		.data		= &panic_on_oops,
1855		.maxlen		= sizeof(int),
1856		.mode		= 0644,
1857		.proc_handler	= proc_dointvec,
1858	},
1859	{
1860		.procname	= "panic_print",
1861		.data		= &panic_print,
1862		.maxlen		= sizeof(unsigned long),
1863		.mode		= 0644,
1864		.proc_handler	= proc_doulongvec_minmax,
1865	},
1866	{
1867		.procname	= "ngroups_max",
1868		.data		= (void *)&ngroups_max,
1869		.maxlen		= sizeof (int),
1870		.mode		= 0444,
1871		.proc_handler	= proc_dointvec,
1872	},
1873	{
1874		.procname	= "cap_last_cap",
1875		.data		= (void *)&cap_last_cap,
1876		.maxlen		= sizeof(int),
1877		.mode		= 0444,
1878		.proc_handler	= proc_dointvec,
1879	},
1880#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
1881	{
1882		.procname       = "unknown_nmi_panic",
1883		.data           = &unknown_nmi_panic,
1884		.maxlen         = sizeof (int),
1885		.mode           = 0644,
1886		.proc_handler   = proc_dointvec,
1887	},
1888#endif
1889
1890#if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \
1891	defined(CONFIG_DEBUG_STACKOVERFLOW)
1892	{
1893		.procname	= "panic_on_stackoverflow",
1894		.data		= &sysctl_panic_on_stackoverflow,
1895		.maxlen		= sizeof(int),
1896		.mode		= 0644,
1897		.proc_handler	= proc_dointvec,
1898	},
1899#endif
1900#if defined(CONFIG_X86)
1901	{
1902		.procname	= "panic_on_unrecovered_nmi",
1903		.data		= &panic_on_unrecovered_nmi,
1904		.maxlen		= sizeof(int),
1905		.mode		= 0644,
1906		.proc_handler	= proc_dointvec,
1907	},
1908	{
1909		.procname	= "panic_on_io_nmi",
1910		.data		= &panic_on_io_nmi,
1911		.maxlen		= sizeof(int),
1912		.mode		= 0644,
1913		.proc_handler	= proc_dointvec,
1914	},
1915	{
1916		.procname	= "bootloader_type",
1917		.data		= &bootloader_type,
1918		.maxlen		= sizeof (int),
1919		.mode		= 0444,
1920		.proc_handler	= proc_dointvec,
1921	},
1922	{
1923		.procname	= "bootloader_version",
1924		.data		= &bootloader_version,
1925		.maxlen		= sizeof (int),
1926		.mode		= 0444,
1927		.proc_handler	= proc_dointvec,
1928	},
1929	{
1930		.procname	= "io_delay_type",
1931		.data		= &io_delay_type,
1932		.maxlen		= sizeof(int),
1933		.mode		= 0644,
1934		.proc_handler	= proc_dointvec,
1935	},
1936#endif
1937#if defined(CONFIG_MMU)
1938	{
1939		.procname	= "randomize_va_space",
1940		.data		= &randomize_va_space,
1941		.maxlen		= sizeof(int),
1942		.mode		= 0644,
1943		.proc_handler	= proc_dointvec,
1944	},
1945#endif
1946#if defined(CONFIG_S390) && defined(CONFIG_SMP)
1947	{
1948		.procname	= "spin_retry",
1949		.data		= &spin_retry,
1950		.maxlen		= sizeof (int),
1951		.mode		= 0644,
1952		.proc_handler	= proc_dointvec,
1953	},
1954#endif
1955#if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1956	{
1957		.procname	= "acpi_video_flags",
1958		.data		= &acpi_realmode_flags,
1959		.maxlen		= sizeof (unsigned long),
1960		.mode		= 0644,
1961		.proc_handler	= proc_doulongvec_minmax,
1962	},
1963#endif
1964#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1965	{
1966		.procname	= "ignore-unaligned-usertrap",
1967		.data		= &no_unaligned_warning,
1968		.maxlen		= sizeof (int),
1969		.mode		= 0644,
1970		.proc_handler	= proc_dointvec,
1971	},
1972#endif
1973#ifdef CONFIG_IA64
1974	{
1975		.procname	= "unaligned-dump-stack",
1976		.data		= &unaligned_dump_stack,
1977		.maxlen		= sizeof (int),
1978		.mode		= 0644,
1979		.proc_handler	= proc_dointvec,
1980	},
1981#endif
1982#ifdef CONFIG_RT_MUTEXES
1983	{
1984		.procname	= "max_lock_depth",
1985		.data		= &max_lock_depth,
1986		.maxlen		= sizeof(int),
1987		.mode		= 0644,
1988		.proc_handler	= proc_dointvec,
1989	},
1990#endif
1991#ifdef CONFIG_KEYS
1992	{
1993		.procname	= "keys",
1994		.mode		= 0555,
1995		.child		= key_sysctls,
1996	},
1997#endif
1998#ifdef CONFIG_PERF_EVENTS
1999	/*
2000	 * User-space scripts rely on the existence of this file
2001	 * as a feature check for perf_events being enabled.
2002	 *
2003	 * So it's an ABI, do not remove!
2004	 */
2005	{
2006		.procname	= "perf_event_paranoid",
2007		.data		= &sysctl_perf_event_paranoid,
2008		.maxlen		= sizeof(sysctl_perf_event_paranoid),
2009		.mode		= 0644,
2010		.proc_handler	= proc_dointvec,
2011	},
2012	{
2013		.procname	= "perf_event_mlock_kb",
2014		.data		= &sysctl_perf_event_mlock,
2015		.maxlen		= sizeof(sysctl_perf_event_mlock),
2016		.mode		= 0644,
2017		.proc_handler	= proc_dointvec,
2018	},
2019	{
2020		.procname	= "perf_event_max_sample_rate",
2021		.data		= &sysctl_perf_event_sample_rate,
2022		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
2023		.mode		= 0644,
2024		.proc_handler	= perf_proc_update_handler,
2025		.extra1		= SYSCTL_ONE,
2026	},
2027	{
2028		.procname	= "perf_cpu_time_max_percent",
2029		.data		= &sysctl_perf_cpu_time_max_percent,
2030		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
2031		.mode		= 0644,
2032		.proc_handler	= perf_cpu_time_max_percent_handler,
2033		.extra1		= SYSCTL_ZERO,
2034		.extra2		= SYSCTL_ONE_HUNDRED,
2035	},
2036	{
2037		.procname	= "perf_event_max_stack",
2038		.data		= &sysctl_perf_event_max_stack,
2039		.maxlen		= sizeof(sysctl_perf_event_max_stack),
2040		.mode		= 0644,
2041		.proc_handler	= perf_event_max_stack_handler,
2042		.extra1		= SYSCTL_ZERO,
2043		.extra2		= (void *)&six_hundred_forty_kb,
2044	},
2045	{
2046		.procname	= "perf_event_max_contexts_per_stack",
2047		.data		= &sysctl_perf_event_max_contexts_per_stack,
2048		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
2049		.mode		= 0644,
2050		.proc_handler	= perf_event_max_stack_handler,
2051		.extra1		= SYSCTL_ZERO,
2052		.extra2		= SYSCTL_ONE_THOUSAND,
2053	},
2054#endif
2055	{
2056		.procname	= "panic_on_warn",
2057		.data		= &panic_on_warn,
2058		.maxlen		= sizeof(int),
2059		.mode		= 0644,
2060		.proc_handler	= proc_dointvec_minmax,
2061		.extra1		= SYSCTL_ZERO,
2062		.extra2		= SYSCTL_ONE,
2063	},
2064#ifdef CONFIG_TREE_RCU
2065	{
2066		.procname	= "panic_on_rcu_stall",
2067		.data		= &sysctl_panic_on_rcu_stall,
2068		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
2069		.mode		= 0644,
2070		.proc_handler	= proc_dointvec_minmax,
2071		.extra1		= SYSCTL_ZERO,
2072		.extra2		= SYSCTL_ONE,
2073	},
2074	{
2075		.procname	= "max_rcu_stall_to_panic",
2076		.data		= &sysctl_max_rcu_stall_to_panic,
2077		.maxlen		= sizeof(sysctl_max_rcu_stall_to_panic),
2078		.mode		= 0644,
2079		.proc_handler	= proc_dointvec_minmax,
2080		.extra1		= SYSCTL_ONE,
2081		.extra2		= SYSCTL_INT_MAX,
2082	},
2083#endif
2084	{ }
2085};
2086
2087static struct ctl_table vm_table[] = {
2088	{
2089		.procname	= "overcommit_memory",
2090		.data		= &sysctl_overcommit_memory,
2091		.maxlen		= sizeof(sysctl_overcommit_memory),
2092		.mode		= 0644,
2093		.proc_handler	= overcommit_policy_handler,
2094		.extra1		= SYSCTL_ZERO,
2095		.extra2		= SYSCTL_TWO,
2096	},
2097	{
2098		.procname	= "overcommit_ratio",
2099		.data		= &sysctl_overcommit_ratio,
2100		.maxlen		= sizeof(sysctl_overcommit_ratio),
2101		.mode		= 0644,
2102		.proc_handler	= overcommit_ratio_handler,
2103	},
2104	{
2105		.procname	= "overcommit_kbytes",
2106		.data		= &sysctl_overcommit_kbytes,
2107		.maxlen		= sizeof(sysctl_overcommit_kbytes),
2108		.mode		= 0644,
2109		.proc_handler	= overcommit_kbytes_handler,
2110	},
2111	{
2112		.procname	= "page-cluster",
2113		.data		= &page_cluster,
2114		.maxlen		= sizeof(int),
2115		.mode		= 0644,
2116		.proc_handler	= proc_dointvec_minmax,
2117		.extra1		= SYSCTL_ZERO,
2118	},
2119	{
2120		.procname	= "dirtytime_expire_seconds",
2121		.data		= &dirtytime_expire_interval,
2122		.maxlen		= sizeof(dirtytime_expire_interval),
2123		.mode		= 0644,
2124		.proc_handler	= dirtytime_interval_handler,
2125		.extra1		= SYSCTL_ZERO,
2126	},
2127	{
2128		.procname	= "swappiness",
2129		.data		= &vm_swappiness,
2130		.maxlen		= sizeof(vm_swappiness),
2131		.mode		= 0644,
2132		.proc_handler	= proc_dointvec_minmax,
2133		.extra1		= SYSCTL_ZERO,
2134		.extra2		= SYSCTL_TWO_HUNDRED,
2135	},
2136#ifdef CONFIG_NUMA
2137	{
2138		.procname	= "numa_stat",
2139		.data		= &sysctl_vm_numa_stat,
2140		.maxlen		= sizeof(int),
2141		.mode		= 0644,
2142		.proc_handler	= sysctl_vm_numa_stat_handler,
2143		.extra1		= SYSCTL_ZERO,
2144		.extra2		= SYSCTL_ONE,
2145	},
2146#endif
2147#ifdef CONFIG_HUGETLB_PAGE
2148	{
2149		.procname	= "nr_hugepages",
2150		.data		= NULL,
2151		.maxlen		= sizeof(unsigned long),
2152		.mode		= 0644,
2153		.proc_handler	= hugetlb_sysctl_handler,
2154	},
2155#ifdef CONFIG_NUMA
2156	{
2157		.procname       = "nr_hugepages_mempolicy",
2158		.data           = NULL,
2159		.maxlen         = sizeof(unsigned long),
2160		.mode           = 0644,
2161		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
2162	},
2163#endif
2164	 {
2165		.procname	= "hugetlb_shm_group",
2166		.data		= &sysctl_hugetlb_shm_group,
2167		.maxlen		= sizeof(gid_t),
2168		.mode		= 0644,
2169		.proc_handler	= proc_dointvec,
2170	 },
2171	{
2172		.procname	= "nr_overcommit_hugepages",
2173		.data		= NULL,
2174		.maxlen		= sizeof(unsigned long),
2175		.mode		= 0644,
2176		.proc_handler	= hugetlb_overcommit_handler,
2177	},
2178#endif
2179	{
2180		.procname	= "lowmem_reserve_ratio",
2181		.data		= &sysctl_lowmem_reserve_ratio,
2182		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
2183		.mode		= 0644,
2184		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
2185	},
2186	{
2187		.procname	= "drop_caches",
2188		.data		= &sysctl_drop_caches,
2189		.maxlen		= sizeof(int),
2190		.mode		= 0200,
2191		.proc_handler	= drop_caches_sysctl_handler,
2192		.extra1		= SYSCTL_ONE,
2193		.extra2		= SYSCTL_FOUR,
2194	},
2195#ifdef CONFIG_COMPACTION
2196	{
2197		.procname	= "compact_memory",
2198		.data		= NULL,
2199		.maxlen		= sizeof(int),
2200		.mode		= 0200,
2201		.proc_handler	= sysctl_compaction_handler,
2202	},
2203	{
2204		.procname	= "compaction_proactiveness",
2205		.data		= &sysctl_compaction_proactiveness,
2206		.maxlen		= sizeof(sysctl_compaction_proactiveness),
2207		.mode		= 0644,
2208		.proc_handler	= compaction_proactiveness_sysctl_handler,
2209		.extra1		= SYSCTL_ZERO,
2210		.extra2		= SYSCTL_ONE_HUNDRED,
2211	},
2212	{
2213		.procname	= "extfrag_threshold",
2214		.data		= &sysctl_extfrag_threshold,
2215		.maxlen		= sizeof(int),
2216		.mode		= 0644,
2217		.proc_handler	= proc_dointvec_minmax,
2218		.extra1		= SYSCTL_ZERO,
2219		.extra2		= (void *)&max_extfrag_threshold,
2220	},
2221	{
2222		.procname	= "compact_unevictable_allowed",
2223		.data		= &sysctl_compact_unevictable_allowed,
2224		.maxlen		= sizeof(int),
2225		.mode		= 0644,
2226		.proc_handler	= proc_dointvec_minmax_warn_RT_change,
2227		.extra1		= SYSCTL_ZERO,
2228		.extra2		= SYSCTL_ONE,
2229	},
2230
2231#endif /* CONFIG_COMPACTION */
2232	{
2233		.procname	= "min_free_kbytes",
2234		.data		= &min_free_kbytes,
2235		.maxlen		= sizeof(min_free_kbytes),
2236		.mode		= 0644,
2237		.proc_handler	= min_free_kbytes_sysctl_handler,
2238		.extra1		= SYSCTL_ZERO,
2239	},
2240	{
2241		.procname	= "watermark_boost_factor",
2242		.data		= &watermark_boost_factor,
2243		.maxlen		= sizeof(watermark_boost_factor),
2244		.mode		= 0644,
2245		.proc_handler	= proc_dointvec_minmax,
2246		.extra1		= SYSCTL_ZERO,
2247	},
2248	{
2249		.procname	= "watermark_scale_factor",
2250		.data		= &watermark_scale_factor,
2251		.maxlen		= sizeof(watermark_scale_factor),
2252		.mode		= 0644,
2253		.proc_handler	= watermark_scale_factor_sysctl_handler,
2254		.extra1		= SYSCTL_ONE,
2255		.extra2		= SYSCTL_THREE_THOUSAND,
2256	},
2257	{
2258		.procname	= "percpu_pagelist_high_fraction",
2259		.data		= &percpu_pagelist_high_fraction,
2260		.maxlen		= sizeof(percpu_pagelist_high_fraction),
2261		.mode		= 0644,
2262		.proc_handler	= percpu_pagelist_high_fraction_sysctl_handler,
2263		.extra1		= SYSCTL_ZERO,
2264	},
2265	{
2266		.procname	= "page_lock_unfairness",
2267		.data		= &sysctl_page_lock_unfairness,
2268		.maxlen		= sizeof(sysctl_page_lock_unfairness),
2269		.mode		= 0644,
2270		.proc_handler	= proc_dointvec_minmax,
2271		.extra1		= SYSCTL_ZERO,
2272	},
2273#ifdef CONFIG_MMU
2274	{
2275		.procname	= "max_map_count",
2276		.data		= &sysctl_max_map_count,
2277		.maxlen		= sizeof(sysctl_max_map_count),
2278		.mode		= 0644,
2279		.proc_handler	= proc_dointvec_minmax,
2280		.extra1		= SYSCTL_ZERO,
2281	},
2282#else
2283	{
2284		.procname	= "nr_trim_pages",
2285		.data		= &sysctl_nr_trim_pages,
2286		.maxlen		= sizeof(sysctl_nr_trim_pages),
2287		.mode		= 0644,
2288		.proc_handler	= proc_dointvec_minmax,
2289		.extra1		= SYSCTL_ZERO,
2290	},
2291#endif
2292	{
2293		.procname	= "vfs_cache_pressure",
2294		.data		= &sysctl_vfs_cache_pressure,
2295		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
2296		.mode		= 0644,
2297		.proc_handler	= proc_dointvec_minmax,
2298		.extra1		= SYSCTL_ZERO,
2299	},
2300#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
2301    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
2302	{
2303		.procname	= "legacy_va_layout",
2304		.data		= &sysctl_legacy_va_layout,
2305		.maxlen		= sizeof(sysctl_legacy_va_layout),
2306		.mode		= 0644,
2307		.proc_handler	= proc_dointvec_minmax,
2308		.extra1		= SYSCTL_ZERO,
2309	},
2310#endif
2311#ifdef CONFIG_NUMA
2312	{
2313		.procname	= "zone_reclaim_mode",
2314		.data		= &node_reclaim_mode,
2315		.maxlen		= sizeof(node_reclaim_mode),
2316		.mode		= 0644,
2317		.proc_handler	= proc_dointvec_minmax,
2318		.extra1		= SYSCTL_ZERO,
2319	},
2320	{
2321		.procname	= "min_unmapped_ratio",
2322		.data		= &sysctl_min_unmapped_ratio,
2323		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
2324		.mode		= 0644,
2325		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
2326		.extra1		= SYSCTL_ZERO,
2327		.extra2		= SYSCTL_ONE_HUNDRED,
2328	},
2329	{
2330		.procname	= "min_slab_ratio",
2331		.data		= &sysctl_min_slab_ratio,
2332		.maxlen		= sizeof(sysctl_min_slab_ratio),
2333		.mode		= 0644,
2334		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
2335		.extra1		= SYSCTL_ZERO,
2336		.extra2		= SYSCTL_ONE_HUNDRED,
2337	},
2338#endif
2339#ifdef CONFIG_SMP
2340	{
2341		.procname	= "stat_interval",
2342		.data		= &sysctl_stat_interval,
2343		.maxlen		= sizeof(sysctl_stat_interval),
2344		.mode		= 0644,
2345		.proc_handler	= proc_dointvec_jiffies,
2346	},
2347	{
2348		.procname	= "stat_refresh",
2349		.data		= NULL,
2350		.maxlen		= 0,
2351		.mode		= 0600,
2352		.proc_handler	= vmstat_refresh,
2353	},
2354#endif
2355#ifdef CONFIG_MMU
2356	{
2357		.procname	= "mmap_min_addr",
2358		.data		= &dac_mmap_min_addr,
2359		.maxlen		= sizeof(unsigned long),
2360		.mode		= 0644,
2361		.proc_handler	= mmap_min_addr_handler,
2362	},
2363#endif
2364#ifdef CONFIG_NUMA
2365	{
2366		.procname	= "numa_zonelist_order",
2367		.data		= &numa_zonelist_order,
2368		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
2369		.mode		= 0644,
2370		.proc_handler	= numa_zonelist_order_handler,
2371	},
2372#endif
2373#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
2374   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
2375	{
2376		.procname	= "vdso_enabled",
2377#ifdef CONFIG_X86_32
2378		.data		= &vdso32_enabled,
2379		.maxlen		= sizeof(vdso32_enabled),
2380#else
2381		.data		= &vdso_enabled,
2382		.maxlen		= sizeof(vdso_enabled),
2383#endif
2384		.mode		= 0644,
2385		.proc_handler	= proc_dointvec,
2386		.extra1		= SYSCTL_ZERO,
2387	},
2388#endif
2389#ifdef CONFIG_MEMORY_FAILURE
2390	{
2391		.procname	= "memory_failure_early_kill",
2392		.data		= &sysctl_memory_failure_early_kill,
2393		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
2394		.mode		= 0644,
2395		.proc_handler	= proc_dointvec_minmax,
2396		.extra1		= SYSCTL_ZERO,
2397		.extra2		= SYSCTL_ONE,
2398	},
2399	{
2400		.procname	= "memory_failure_recovery",
2401		.data		= &sysctl_memory_failure_recovery,
2402		.maxlen		= sizeof(sysctl_memory_failure_recovery),
2403		.mode		= 0644,
2404		.proc_handler	= proc_dointvec_minmax,
2405		.extra1		= SYSCTL_ZERO,
2406		.extra2		= SYSCTL_ONE,
2407	},
2408#endif
2409	{
2410		.procname	= "user_reserve_kbytes",
2411		.data		= &sysctl_user_reserve_kbytes,
2412		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
2413		.mode		= 0644,
2414		.proc_handler	= proc_doulongvec_minmax,
2415	},
2416	{
2417		.procname	= "admin_reserve_kbytes",
2418		.data		= &sysctl_admin_reserve_kbytes,
2419		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
2420		.mode		= 0644,
2421		.proc_handler	= proc_doulongvec_minmax,
2422	},
2423#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
2424	{
2425		.procname	= "mmap_rnd_bits",
2426		.data		= &mmap_rnd_bits,
2427		.maxlen		= sizeof(mmap_rnd_bits),
2428		.mode		= 0600,
2429		.proc_handler	= proc_dointvec_minmax,
2430		.extra1		= (void *)&mmap_rnd_bits_min,
2431		.extra2		= (void *)&mmap_rnd_bits_max,
2432	},
2433#endif
2434#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
2435	{
2436		.procname	= "mmap_rnd_compat_bits",
2437		.data		= &mmap_rnd_compat_bits,
2438		.maxlen		= sizeof(mmap_rnd_compat_bits),
2439		.mode		= 0600,
2440		.proc_handler	= proc_dointvec_minmax,
2441		.extra1		= (void *)&mmap_rnd_compat_bits_min,
2442		.extra2		= (void *)&mmap_rnd_compat_bits_max,
2443	},
2444#endif
2445#ifdef CONFIG_USERFAULTFD
2446	{
2447		.procname	= "unprivileged_userfaultfd",
2448		.data		= &sysctl_unprivileged_userfaultfd,
2449		.maxlen		= sizeof(sysctl_unprivileged_userfaultfd),
2450		.mode		= 0644,
2451		.proc_handler	= proc_dointvec_minmax,
2452		.extra1		= SYSCTL_ZERO,
2453		.extra2		= SYSCTL_ONE,
2454	},
2455#endif
2456	{ }
2457};
2458
2459static struct ctl_table debug_table[] = {
2460#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
2461	{
2462		.procname	= "exception-trace",
2463		.data		= &show_unhandled_signals,
2464		.maxlen		= sizeof(int),
2465		.mode		= 0644,
2466		.proc_handler	= proc_dointvec
2467	},
2468#endif
2469	{ }
2470};
2471
2472static struct ctl_table dev_table[] = {
2473	{ }
2474};
2475
2476DECLARE_SYSCTL_BASE(kernel, kern_table);
2477DECLARE_SYSCTL_BASE(vm, vm_table);
2478DECLARE_SYSCTL_BASE(debug, debug_table);
2479DECLARE_SYSCTL_BASE(dev, dev_table);
2480
2481int __init sysctl_init_bases(void)
2482{
2483	register_sysctl_base(kernel);
2484	register_sysctl_base(vm);
2485	register_sysctl_base(debug);
2486	register_sysctl_base(dev);
2487
2488	return 0;
2489}
2490#endif /* CONFIG_SYSCTL */
2491/*
2492 * No sense putting this after each symbol definition, twice,
2493 * exception granted :-)
2494 */
2495EXPORT_SYMBOL(proc_dobool);
2496EXPORT_SYMBOL(proc_dointvec);
2497EXPORT_SYMBOL(proc_douintvec);
2498EXPORT_SYMBOL(proc_dointvec_jiffies);
2499EXPORT_SYMBOL(proc_dointvec_minmax);
2500EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
2501EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2502EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2503EXPORT_SYMBOL(proc_dostring);
2504EXPORT_SYMBOL(proc_doulongvec_minmax);
2505EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2506EXPORT_SYMBOL(proc_do_large_bitmap);
2507