1/*
2 * Copyright (c) 2020 iXsystems, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31#include <sys/types.h>
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/conf.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/malloc.h>
38#include <sys/mutex.h>
39#include <sys/proc.h>
40#include <sys/errno.h>
41#include <sys/uio.h>
42#include <sys/buf.h>
43#include <sys/file.h>
44#include <sys/kmem.h>
45#include <sys/conf.h>
46#include <sys/cmn_err.h>
47#include <sys/stat.h>
48#include <sys/zfs_ioctl.h>
49#include <sys/zfs_vfsops.h>
50#include <sys/zfs_znode.h>
51#include <sys/zap.h>
52#include <sys/spa.h>
53#include <sys/spa_impl.h>
54#include <sys/vdev.h>
55#include <sys/vdev_impl.h>
56#include <sys/dmu.h>
57#include <sys/dsl_dir.h>
58#include <sys/dsl_dataset.h>
59#include <sys/dsl_prop.h>
60#include <sys/dsl_deleg.h>
61#include <sys/dmu_objset.h>
62#include <sys/dmu_impl.h>
63#include <sys/dmu_tx.h>
64#include <sys/sunddi.h>
65#include <sys/policy.h>
66#include <sys/zone.h>
67#include <sys/nvpair.h>
68#include <sys/mount.h>
69#include <sys/taskqueue.h>
70#include <sys/sdt.h>
71#include <sys/fs/zfs.h>
72#include <sys/zfs_ctldir.h>
73#include <sys/zfs_dir.h>
74#include <sys/zfs_onexit.h>
75#include <sys/zvol.h>
76#include <sys/dsl_scan.h>
77#include <sys/dmu_objset.h>
78#include <sys/dmu_send.h>
79#include <sys/dsl_destroy.h>
80#include <sys/dsl_bookmark.h>
81#include <sys/dsl_userhold.h>
82#include <sys/zfeature.h>
83#include <sys/zcp.h>
84#include <sys/zio_checksum.h>
85#include <sys/vdev_removal.h>
86#include <sys/dsl_crypt.h>
87
88#include <sys/zfs_ioctl_compat.h>
89#include <sys/zfs_context.h>
90
91#include <sys/arc_impl.h>
92#include <sys/dsl_pool.h>
93
94
95/* BEGIN CSTYLED */
96SYSCTL_DECL(_vfs_zfs);
97SYSCTL_NODE(_vfs_zfs, OID_AUTO, arc, CTLFLAG_RW, 0, "ZFS adaptive replacement cache");
98SYSCTL_NODE(_vfs_zfs, OID_AUTO, condense, CTLFLAG_RW, 0, "ZFS condense");
99SYSCTL_NODE(_vfs_zfs, OID_AUTO, dbuf, CTLFLAG_RW, 0, "ZFS disk buf cache");
100SYSCTL_NODE(_vfs_zfs, OID_AUTO, dbuf_cache, CTLFLAG_RW, 0, "ZFS disk buf cache");
101SYSCTL_NODE(_vfs_zfs, OID_AUTO, deadman, CTLFLAG_RW, 0, "ZFS deadman");
102SYSCTL_NODE(_vfs_zfs, OID_AUTO, dedup, CTLFLAG_RW, 0, "ZFS dedup");
103SYSCTL_NODE(_vfs_zfs, OID_AUTO, l2arc, CTLFLAG_RW, 0, "ZFS l2arc");
104SYSCTL_NODE(_vfs_zfs, OID_AUTO, livelist, CTLFLAG_RW, 0, "ZFS livelist");
105SYSCTL_NODE(_vfs_zfs, OID_AUTO, lua, CTLFLAG_RW, 0, "ZFS lua");
106SYSCTL_NODE(_vfs_zfs, OID_AUTO, metaslab, CTLFLAG_RW, 0, "ZFS metaslab");
107SYSCTL_NODE(_vfs_zfs, OID_AUTO, mg, CTLFLAG_RW, 0, "ZFS metaslab group");
108SYSCTL_NODE(_vfs_zfs, OID_AUTO, multihost, CTLFLAG_RW, 0, "ZFS multihost protection");
109SYSCTL_NODE(_vfs_zfs, OID_AUTO, prefetch, CTLFLAG_RW, 0, "ZFS prefetch");
110SYSCTL_NODE(_vfs_zfs, OID_AUTO, reconstruct, CTLFLAG_RW, 0, "ZFS reconstruct");
111SYSCTL_NODE(_vfs_zfs, OID_AUTO, recv, CTLFLAG_RW, 0, "ZFS receive");
112SYSCTL_NODE(_vfs_zfs, OID_AUTO, send, CTLFLAG_RW, 0, "ZFS send");
113SYSCTL_NODE(_vfs_zfs, OID_AUTO, spa, CTLFLAG_RW, 0, "ZFS space allocation");
114SYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RW, 0, "ZFS TRIM");
115SYSCTL_NODE(_vfs_zfs, OID_AUTO, txg, CTLFLAG_RW, 0, "ZFS transaction group");
116SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
117SYSCTL_NODE(_vfs_zfs, OID_AUTO, vnops, CTLFLAG_RW, 0, "ZFS VNOPS");
118SYSCTL_NODE(_vfs_zfs, OID_AUTO, zevent, CTLFLAG_RW, 0, "ZFS event");
119SYSCTL_NODE(_vfs_zfs, OID_AUTO, zil, CTLFLAG_RW, 0, "ZFS ZIL");
120SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
121
122SYSCTL_NODE(_vfs_zfs_livelist, OID_AUTO, condense, CTLFLAG_RW, 0,
123    "ZFS livelist condense");
124SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, cache, CTLFLAG_RW, 0, "ZFS VDEV Cache");
125SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, file, CTLFLAG_RW, 0, "ZFS VDEV file");
126SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, mirror, CTLFLAG_RD, 0,
127    "ZFS VDEV mirror");
128
129SYSCTL_DECL(_vfs_zfs_version);
130SYSCTL_CONST_STRING(_vfs_zfs_version, OID_AUTO, module, CTLFLAG_RD,
131    (ZFS_META_VERSION "-" ZFS_META_RELEASE), "OpenZFS module version");
132
133extern arc_state_t ARC_anon;
134extern arc_state_t ARC_mru;
135extern arc_state_t ARC_mru_ghost;
136extern arc_state_t ARC_mfu;
137extern arc_state_t ARC_mfu_ghost;
138extern arc_state_t ARC_l2c_only;
139
140/*
141 * minimum lifespan of a prefetch block in clock ticks
142 * (initialized in arc_init())
143 */
144
145/* arc.c */
146
147/* legacy compat */
148extern uint64_t l2arc_write_max;	/* def max write size */
149extern uint64_t l2arc_write_boost;	/* extra warmup write */
150extern uint64_t l2arc_headroom;		/* # of dev writes */
151extern uint64_t l2arc_headroom_boost;
152extern uint64_t l2arc_feed_secs;	/* interval seconds */
153extern uint64_t l2arc_feed_min_ms;	/* min interval msecs */
154extern int l2arc_noprefetch;			/* don't cache prefetch bufs */
155extern int l2arc_feed_again;			/* turbo warmup */
156extern int l2arc_norw;			/* no reads during writes */
157
158SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max, CTLFLAG_RW,
159    &l2arc_write_max, 0, "max write size (LEGACY)");
160SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost, CTLFLAG_RW,
161    &l2arc_write_boost, 0, "extra write during warmup (LEGACY)");
162SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom, CTLFLAG_RW,
163    &l2arc_headroom, 0, "number of dev writes (LEGACY)");
164SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs, CTLFLAG_RW,
165    &l2arc_feed_secs, 0, "interval seconds (LEGACY)");
166SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms, CTLFLAG_RW,
167    &l2arc_feed_min_ms, 0, "min interval milliseconds (LEGACY)");
168
169SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch, CTLFLAG_RW,
170    &l2arc_noprefetch, 0, "don't cache prefetch bufs (LEGACY)");
171SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again, CTLFLAG_RW,
172    &l2arc_feed_again, 0, "turbo warmup (LEGACY)");
173SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw, CTLFLAG_RW,
174    &l2arc_norw, 0, "no reads during writes (LEGACY)");
175#if 0
176extern int zfs_compressed_arc_enabled;
177SYSCTL_INT(_vfs_zfs, OID_AUTO, compressed_arc_enabled, CTLFLAG_RW,
178    &zfs_compressed_arc_enabled, 1, "compressed arc buffers (LEGACY)");
179#endif
180
181SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_size, CTLFLAG_RD,
182    &ARC_anon.arcs_size.rc_count, 0, "size of anonymous state");
183SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD,
184    &ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
185    "size of anonymous state");
186SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD,
187    &ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
188    "size of anonymous state");
189
190SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_size, CTLFLAG_RD,
191    &ARC_mru.arcs_size.rc_count, 0, "size of mru state");
192SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD,
193    &ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
194    "size of metadata in mru state");
195SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD,
196    &ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
197    "size of data in mru state");
198
199SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_size, CTLFLAG_RD,
200    &ARC_mru_ghost.arcs_size.rc_count, 0, "size of mru ghost state");
201SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD,
202    &ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
203    "size of metadata in mru ghost state");
204SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD,
205    &ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
206    "size of data in mru ghost state");
207
208SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_size, CTLFLAG_RD,
209    &ARC_mfu.arcs_size.rc_count, 0, "size of mfu state");
210SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD,
211    &ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
212    "size of metadata in mfu state");
213SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD,
214    &ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
215    "size of data in mfu state");
216
217SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_size, CTLFLAG_RD,
218    &ARC_mfu_ghost.arcs_size.rc_count, 0, "size of mfu ghost state");
219SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD,
220    &ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0,
221    "size of metadata in mfu ghost state");
222SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD,
223    &ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0,
224    "size of data in mfu ghost state");
225
226SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2c_only_size, CTLFLAG_RD,
227    &ARC_l2c_only.arcs_size.rc_count, 0, "size of mru state");
228
229static int
230sysctl_vfs_zfs_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
231{
232	int err, val;
233
234	val = arc_no_grow_shift;
235	err = sysctl_handle_int(oidp, &val, 0, req);
236	if (err != 0 || req->newptr == NULL)
237		return (err);
238
239        if (val < 0 || val >= arc_shrink_shift)
240		return (EINVAL);
241
242	arc_no_grow_shift = val;
243	return (0);
244}
245
246SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift,
247    CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, sizeof (int),
248    sysctl_vfs_zfs_arc_no_grow_shift, "I",
249    "log2(fraction of ARC which must be free to allow growing)");
250
251int
252param_set_arc_long(SYSCTL_HANDLER_ARGS)
253{
254	int err;
255
256	err = sysctl_handle_long(oidp, arg1, 0, req);
257	if (err != 0 || req->newptr == NULL)
258		return (err);
259
260	arc_tuning_update(B_TRUE);
261
262	return (0);
263}
264
265int
266param_set_arc_int(SYSCTL_HANDLER_ARGS)
267{
268	int err;
269
270	err = sysctl_handle_int(oidp, arg1, 0, req);
271	if (err != 0 || req->newptr == NULL)
272		return (err);
273
274	arc_tuning_update(B_TRUE);
275
276	return (0);
277}
278
279SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min,
280    CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
281    &zfs_arc_min, sizeof (zfs_arc_min), param_set_arc_long, "LU",
282    "min arc size (LEGACY)");
283SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max,
284    CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
285    &zfs_arc_max, sizeof (zfs_arc_max), param_set_arc_long, "LU",
286    "max arc size (LEGACY)");
287
288/* dbuf.c */
289
290
291/* dmu.c */
292
293/* dmu_zfetch.c */
294SYSCTL_NODE(_vfs_zfs, OID_AUTO, zfetch, CTLFLAG_RW, 0, "ZFS ZFETCH (LEGACY)");
295
296/* max bytes to prefetch per stream (default 8MB) */
297extern uint32_t	zfetch_max_distance;
298SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance, CTLFLAG_RWTUN,
299    &zfetch_max_distance, 0, "Max bytes to prefetch per stream (LEGACY)");
300
301/* max bytes to prefetch indirects for per stream (default 64MB) */
302extern uint32_t	zfetch_max_idistance;
303SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance, CTLFLAG_RWTUN,
304    &zfetch_max_idistance, 0,
305    "Max bytes to prefetch indirects for per stream (LEGACY)");
306
307/* dsl_pool.c */
308
309/* dnode.c */
310extern int zfs_default_bs;
311SYSCTL_INT(_vfs_zfs, OID_AUTO, default_bs, CTLFLAG_RWTUN,
312    &zfs_default_bs, 0, "Default dnode block shift");
313
314extern int zfs_default_ibs;
315SYSCTL_INT(_vfs_zfs, OID_AUTO, default_ibs, CTLFLAG_RWTUN,
316    &zfs_default_ibs, 0, "Default dnode indirect block shift");
317
318
319/* dsl_scan.c */
320
321/* metaslab.c */
322
323/*
324 * In pools where the log space map feature is not enabled we touch
325 * multiple metaslabs (and their respective space maps) with each
326 * transaction group. Thus, we benefit from having a small space map
327 * block size since it allows us to issue more I/O operations scattered
328 * around the disk. So a sane default for the space map block size
329 * is 8~16K.
330 */
331extern int zfs_metaslab_sm_blksz_no_log;
332SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_no_log, CTLFLAG_RDTUN,
333    &zfs_metaslab_sm_blksz_no_log, 0,
334    "Block size for space map in pools with log space map disabled.  "
335    "Power of 2 and greater than 4096.");
336
337/*
338 * When the log space map feature is enabled, we accumulate a lot of
339 * changes per metaslab that are flushed once in a while so we benefit
340 * from a bigger block size like 128K for the metaslab space maps.
341 */
342extern int zfs_metaslab_sm_blksz_with_log;
343SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, sm_blksz_with_log, CTLFLAG_RDTUN,
344    &zfs_metaslab_sm_blksz_with_log, 0,
345    "Block size for space map in pools with log space map enabled.  "
346    "Power of 2 and greater than 4096.");
347
348/*
349 * The in-core space map representation is more compact than its on-disk form.
350 * The zfs_condense_pct determines how much more compact the in-core
351 * space map representation must be before we compact it on-disk.
352 * Values should be greater than or equal to 100.
353 */
354extern int zfs_condense_pct;
355SYSCTL_INT(_vfs_zfs, OID_AUTO, condense_pct, CTLFLAG_RWTUN,
356    &zfs_condense_pct, 0,
357    "Condense on-disk spacemap when it is more than this many percents"
358    " of in-memory counterpart");
359
360extern int zfs_remove_max_segment;
361SYSCTL_INT(_vfs_zfs, OID_AUTO, remove_max_segment, CTLFLAG_RWTUN,
362    &zfs_remove_max_segment, 0, "Largest contiguous segment ZFS will attempt to"
363    " allocate when removing a device");
364
365extern int zfs_removal_suspend_progress;
366SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress, CTLFLAG_RWTUN,
367    &zfs_removal_suspend_progress, 0, "Ensures certain actions can happen while"
368    " in the middle of a removal");
369
370
371/*
372 * Minimum size which forces the dynamic allocator to change
373 * it's allocation strategy.  Once the space map cannot satisfy
374 * an allocation of this size then it switches to using more
375 * aggressive strategy (i.e search by size rather than offset).
376 */
377extern uint64_t metaslab_df_alloc_threshold;
378SYSCTL_QUAD(_vfs_zfs_metaslab, OID_AUTO, df_alloc_threshold, CTLFLAG_RWTUN,
379    &metaslab_df_alloc_threshold, 0,
380    "Minimum size which forces the dynamic allocator to change it's allocation strategy");
381
382/*
383 * The minimum free space, in percent, which must be available
384 * in a space map to continue allocations in a first-fit fashion.
385 * Once the space map's free space drops below this level we dynamically
386 * switch to using best-fit allocations.
387 */
388extern int metaslab_df_free_pct;
389SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct, CTLFLAG_RWTUN,
390    &metaslab_df_free_pct, 0,
391    "The minimum free space, in percent, which must be available in a "
392    "space map to continue allocations in a first-fit fashion");
393
394/*
395 * Percentage of all cpus that can be used by the metaslab taskq.
396 */
397extern int metaslab_load_pct;
398SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, load_pct, CTLFLAG_RWTUN,
399    &metaslab_load_pct, 0,
400    "Percentage of cpus that can be used by the metaslab taskq");
401
402/*
403 * Max number of metaslabs per group to preload.
404 */
405extern int metaslab_preload_limit;
406SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, preload_limit, CTLFLAG_RWTUN,
407    &metaslab_preload_limit, 0,
408    "Max number of metaslabs per group to preload");
409
410/* spa.c */
411extern int zfs_ccw_retry_interval;
412SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval, CTLFLAG_RWTUN,
413    &zfs_ccw_retry_interval, 0,
414    "Configuration cache file write, retry after failure, interval (seconds)");
415
416extern uint64_t zfs_max_missing_tvds_cachefile;
417SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_cachefile, CTLFLAG_RWTUN,
418    &zfs_max_missing_tvds_cachefile, 0,
419    "allow importing pools with missing top-level vdevs in cache file");
420
421extern uint64_t zfs_max_missing_tvds_scan;
422SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, max_missing_tvds_scan, CTLFLAG_RWTUN,
423    &zfs_max_missing_tvds_scan, 0,
424    "allow importing pools with missing top-level vdevs during scan");
425
426/* spa_misc.c */
427extern int zfs_flags;
428static int
429sysctl_vfs_zfs_debug_flags(SYSCTL_HANDLER_ARGS)
430{
431	int err, val;
432
433	val = zfs_flags;
434	err = sysctl_handle_int(oidp, &val, 0, req);
435	if (err != 0 || req->newptr == NULL)
436		return (err);
437
438	/*
439	 * ZFS_DEBUG_MODIFY must be enabled prior to boot so all
440	 * arc buffers in the system have the necessary additional
441	 * checksum data.  However, it is safe to disable at any
442	 * time.
443	 */
444	if (!(zfs_flags & ZFS_DEBUG_MODIFY))
445		val &= ~ZFS_DEBUG_MODIFY;
446	zfs_flags = val;
447
448	return (0);
449}
450
451SYSCTL_PROC(_vfs_zfs, OID_AUTO, debugflags,
452    CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, NULL, 0,
453    sysctl_vfs_zfs_debug_flags, "IU", "Debug flags for ZFS testing.");
454
455int
456param_set_deadman_synctime(SYSCTL_HANDLER_ARGS)
457{
458	unsigned long val;
459	int err;
460
461	val = zfs_deadman_synctime_ms;
462	err = sysctl_handle_long(oidp, &val, 0, req);
463	if (err != 0 || req->newptr == NULL)
464		return (err);
465	zfs_deadman_synctime_ms = val;
466
467	spa_set_deadman_synctime(MSEC2NSEC(zfs_deadman_synctime_ms));
468
469	return (0);
470}
471
472int
473param_set_deadman_ziotime(SYSCTL_HANDLER_ARGS)
474{
475	unsigned long val;
476	int err;
477
478	val = zfs_deadman_ziotime_ms;
479	err = sysctl_handle_long(oidp, &val, 0, req);
480	if (err != 0 || req->newptr == NULL)
481		return (err);
482	zfs_deadman_ziotime_ms = val;
483
484	spa_set_deadman_ziotime(MSEC2NSEC(zfs_deadman_synctime_ms));
485
486	return (0);
487}
488
489int
490param_set_deadman_failmode(SYSCTL_HANDLER_ARGS)
491{
492	char buf[16];
493	int rc;
494
495	if (req->newptr == NULL)
496		strlcpy(buf, zfs_deadman_failmode, sizeof (buf));
497
498	rc = sysctl_handle_string(oidp, buf, sizeof (buf), req);
499	if (rc || req->newptr == NULL)
500		return (rc);
501	if (strcmp(buf, zfs_deadman_failmode) == 0)
502		return (0);
503	if (!strcmp(buf,  "wait"))
504		zfs_deadman_failmode = "wait";
505	if (!strcmp(buf,  "continue"))
506		zfs_deadman_failmode = "continue";
507	if (!strcmp(buf,  "panic"))
508		zfs_deadman_failmode = "panic";
509
510	return (-param_set_deadman_failmode_common(buf));
511}
512
513
514/* spacemap.c */
515extern int space_map_ibs;
516SYSCTL_INT(_vfs_zfs, OID_AUTO, space_map_ibs, CTLFLAG_RWTUN,
517    &space_map_ibs, 0, "Space map indirect block shift");
518
519
520/* vdev.c */
521int
522param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS)
523{
524	uint64_t val;
525	int err;
526
527	val = zfs_vdev_min_auto_ashift;
528	err = sysctl_handle_64(oidp, &val, 0, req);
529	if (err != 0 || req->newptr == NULL)
530		return (SET_ERROR(err));
531
532	if (val < ASHIFT_MIN || val > zfs_vdev_max_auto_ashift)
533		return (SET_ERROR(EINVAL));
534
535	zfs_vdev_min_auto_ashift = val;
536
537	return (0);
538}
539
540int
541param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
542{
543	uint64_t val;
544	int err;
545
546	val = zfs_vdev_max_auto_ashift;
547	err = sysctl_handle_64(oidp, &val, 0, req);
548	if (err != 0 || req->newptr == NULL)
549		return (SET_ERROR(err));
550
551	if (val > ASHIFT_MAX || val < zfs_vdev_min_auto_ashift)
552		return (SET_ERROR(EINVAL));
553
554	zfs_vdev_max_auto_ashift = val;
555
556	return (0);
557}
558
559SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
560    CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
561    &zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift),
562    param_set_min_auto_ashift, "QU",
563    "Min ashift used when creating new top-level vdev. (LEGACY)");
564SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
565    CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
566    &zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift),
567    param_set_max_auto_ashift, "QU",
568    "Max ashift used when optimizing for logical -> physical sector size on "
569    "new top-level vdevs. (LEGACY)");
570
571/*
572 * Since the DTL space map of a vdev is not expected to have a lot of
573 * entries, we default its block size to 4K.
574 */
575extern int zfs_vdev_dtl_sm_blksz;
576SYSCTL_INT(_vfs_zfs, OID_AUTO, dtl_sm_blksz, CTLFLAG_RDTUN,
577    &zfs_vdev_dtl_sm_blksz, 0,
578    "Block size for DTL space map.  Power of 2 and greater than 4096.");
579
580/*
581 * vdev-wide space maps that have lots of entries written to them at
582 * the end of each transaction can benefit from a higher I/O bandwidth
583 * (e.g. vdev_obsolete_sm), thus we default their block size to 128K.
584 */
585extern int zfs_vdev_standard_sm_blksz;
586SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz, CTLFLAG_RDTUN,
587    &zfs_vdev_standard_sm_blksz, 0,
588    "Block size for standard space map.  Power of 2 and greater than 4096.");
589
590extern int vdev_validate_skip;
591SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip, CTLFLAG_RDTUN,
592    &vdev_validate_skip, 0,
593    "Enable to bypass vdev_validate().");
594
595
596/* vdev_cache.c */
597
598/* vdev_mirror.c */
599/*
600 * The load configuration settings below are tuned by default for
601 * the case where all devices are of the same rotational type.
602 *
603 * If there is a mixture of rotating and non-rotating media, setting
604 * non_rotating_seek_inc to 0 may well provide better results as it
605 * will direct more reads to the non-rotating vdevs which are more
606 * likely to have a higher performance.
607 */
608
609
610/* vdev_queue.c */
611#define	ZFS_VDEV_QUEUE_KNOB_MIN(name)					\
612extern uint32_t zfs_vdev_ ## name ## _min_active;				\
613SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _min_active, CTLFLAG_RWTUN,\
614    &zfs_vdev_ ## name ## _min_active, 0,				\
615    "Initial number of I/O requests of type " #name			\
616    " active for each device");
617
618#define	ZFS_VDEV_QUEUE_KNOB_MAX(name)					\
619extern uint32_t zfs_vdev_ ## name ## _max_active;				\
620SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _max_active, CTLFLAG_RWTUN, \
621    &zfs_vdev_ ## name ## _max_active, 0,				\
622    "Maximum number of I/O requests of type " #name			\
623    " active for each device");
624
625
626#undef ZFS_VDEV_QUEUE_KNOB
627
628extern uint32_t zfs_vdev_max_active;
629SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, CTLFLAG_RWTUN,
630    &zfs_vdev_max_active, 0,
631    "The maximum number of I/Os of all types active for each device. (LEGACY)");
632
633extern int zfs_vdev_def_queue_depth;
634SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, def_queue_depth, CTLFLAG_RWTUN,
635    &zfs_vdev_def_queue_depth, 0,
636    "Default queue depth for each allocator");
637
638/*extern uint64_t zfs_multihost_history;
639SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, multihost_history, CTLFLAG_RWTUN,
640    &zfs_multihost_history, 0,
641    "Historical staticists for the last N multihost updates");*/
642
643#ifdef notyet
644SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, trim_on_init, CTLFLAG_RW,
645    &vdev_trim_on_init, 0, "Enable/disable full vdev trim on initialisation");
646#endif
647
648
649/* zio.c */
650#if defined(__LP64__)
651int zio_use_uma = 1;
652#else
653int zio_use_uma = 0;
654#endif
655
656SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, use_uma, CTLFLAG_RDTUN, &zio_use_uma, 0,
657    "Use uma(9) for ZIO allocations");
658SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata, CTLFLAG_RDTUN, &zio_exclude_metadata, 0,
659    "Exclude metadata buffers from dumps as well");
660
661int
662param_set_slop_shift(SYSCTL_HANDLER_ARGS)
663{
664	int val;
665	int err;
666
667	val = *(int *)arg1;
668
669	err = sysctl_handle_int(oidp, &val, 0, req);
670	if (err != 0 || req->newptr == NULL)
671		return (err);
672
673	if (val < 1 || val > 31)
674		return (EINVAL);
675
676	*(int *)arg1 = val;
677
678	return (0);
679}
680
681int
682param_set_multihost_interval(SYSCTL_HANDLER_ARGS)
683{
684	int err;
685
686	err = sysctl_handle_long(oidp, arg1, 0, req);
687	if (err != 0 || req->newptr == NULL)
688		return (err);
689
690	if (spa_mode_global != SPA_MODE_UNINIT)
691		mmp_signal_all_threads();
692
693	return (0);
694}
695