Deleted Added
sdiff udiff text old ( 254112 ) new ( 254591 )
full compact
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 38 unchanged lines hidden (view full) ---

47
48SYSCTL_DECL(_vfs_zfs);
49SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
50
51/*
52 * Virtual device management.
53 */
54
55/**
56 * The limit for ZFS to automatically increase a top-level vdev's ashift
57 * from logical ashift to physical ashift.
58 *
59 * Example: one or more 512B emulation child vdevs
60 * child->vdev_ashift = 9 (512 bytes)
61 * child->vdev_physical_ashift = 12 (4096 bytes)
62 * zfs_max_auto_ashift = 11 (2048 bytes)
63 *
64 * On pool creation or the addition of a new top-leve vdev, ZFS will
65 * bump the ashift of the top-level vdev to 2048.
66 *
67 * Example: one or more 512B emulation child vdevs
68 * child->vdev_ashift = 9 (512 bytes)
69 * child->vdev_physical_ashift = 12 (4096 bytes)
70 * zfs_max_auto_ashift = 13 (8192 bytes)
71 *
72 * On pool creation or the addition of a new top-leve vdev, ZFS will
73 * bump the ashift of the top-level vdev to 4096.
74 */
75static uint64_t zfs_max_auto_ashift = SPA_MAXASHIFT;
76
77static int
78sysctl_vfs_zfs_max_auto_ashift(SYSCTL_HANDLER_ARGS)
79{
80 uint64_t val;
81 int err;
82
83 val = zfs_max_auto_ashift;
84 err = sysctl_handle_64(oidp, &val, 0, req);
85 if (err != 0 || req->newptr == NULL)
86 return (err);
87
88 if (val > SPA_MAXASHIFT)
89 val = SPA_MAXASHIFT;
90
91 zfs_max_auto_ashift = val;
92
93 return (0);
94}
95SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
96 CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(uint64_t),
97 sysctl_vfs_zfs_max_auto_ashift, "QU",
98 "Cap on logical -> physical ashift adjustment on new top-level vdevs.");
99
100static vdev_ops_t *vdev_ops_table[] = {
101 &vdev_root_ops,
102 &vdev_raidz_ops,
103 &vdev_mirror_ops,
104 &vdev_replacing_ops,
105 &vdev_spare_ops,
106#ifdef _KERNEL
107 &vdev_geom_ops,

--- 678 unchanged lines hidden (view full) ---

786 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
787
788 mvd = vdev_alloc_common(spa, cvd->vdev_id, 0, ops);
789
790 mvd->vdev_asize = cvd->vdev_asize;
791 mvd->vdev_min_asize = cvd->vdev_min_asize;
792 mvd->vdev_max_asize = cvd->vdev_max_asize;
793 mvd->vdev_ashift = cvd->vdev_ashift;
794 mvd->vdev_logical_ashift = cvd->vdev_logical_ashift;
795 mvd->vdev_physical_ashift = cvd->vdev_physical_ashift;
796 mvd->vdev_state = cvd->vdev_state;
797 mvd->vdev_crtxg = cvd->vdev_crtxg;
798
799 vdev_remove_child(pvd, cvd);
800 vdev_add_child(pvd, mvd);
801 cvd->vdev_id = mvd->vdev_children;
802 vdev_add_child(mvd, cvd);
803 vdev_top_update(cvd->vdev_top, cvd->vdev_top);

--- 15 unchanged lines hidden (view full) ---

819
820 ASSERT(spa_config_held(cvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL);
821
822 ASSERT(mvd->vdev_children == 1);
823 ASSERT(mvd->vdev_ops == &vdev_mirror_ops ||
824 mvd->vdev_ops == &vdev_replacing_ops ||
825 mvd->vdev_ops == &vdev_spare_ops);
826 cvd->vdev_ashift = mvd->vdev_ashift;
827 cvd->vdev_logical_ashift = mvd->vdev_logical_ashift;
828 cvd->vdev_physical_ashift = mvd->vdev_physical_ashift;
829
830 vdev_remove_child(mvd, cvd);
831 vdev_remove_child(pvd, mvd);
832
833 /*
834 * If cvd will replace mvd as a top-level vdev, preserve mvd's guid.
835 * Otherwise, we could have detached an offline device, and when we
836 * go to import the pool we'll think we have two top-level vdevs,

--- 327 unchanged lines hidden (view full) ---

1164int
1165vdev_open(vdev_t *vd)
1166{
1167 spa_t *spa = vd->vdev_spa;
1168 int error;
1169 uint64_t osize = 0;
1170 uint64_t max_osize = 0;
1171 uint64_t asize, max_asize, psize;
1172 uint64_t logical_ashift = 0;
1173 uint64_t physical_ashift = 0;
1174
1175 ASSERT(vd->vdev_open_thread == curthread ||
1176 spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
1177 ASSERT(vd->vdev_state == VDEV_STATE_CLOSED ||
1178 vd->vdev_state == VDEV_STATE_CANT_OPEN ||
1179 vd->vdev_state == VDEV_STATE_OFFLINE);
1180
1181 vd->vdev_stat.vs_aux = VDEV_AUX_NONE;

--- 13 unchanged lines hidden (view full) ---

1195 vd->vdev_label_aux);
1196 return (SET_ERROR(ENXIO));
1197 } else if (vd->vdev_offline) {
1198 ASSERT(vd->vdev_children == 0);
1199 vdev_set_state(vd, B_TRUE, VDEV_STATE_OFFLINE, VDEV_AUX_NONE);
1200 return (SET_ERROR(ENXIO));
1201 }
1202
1203 error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize,
1204 &logical_ashift, &physical_ashift);
1205
1206 /*
1207 * Reset the vdev_reopening flag so that we actually close
1208 * the vdev on error.
1209 */
1210 vd->vdev_reopening = B_FALSE;
1211 if (zio_injection_enabled && error == 0)
1212 error = zio_handle_device_injection(vd, NULL, ENXIO);

--- 81 unchanged lines hidden (view full) ---

1294 * Make sure the allocatable size hasn't shrunk.
1295 */
1296 if (asize < vd->vdev_min_asize) {
1297 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
1298 VDEV_AUX_BAD_LABEL);
1299 return (SET_ERROR(EINVAL));
1300 }
1301
1302 vd->vdev_physical_ashift =
1303 MAX(physical_ashift, vd->vdev_physical_ashift);
1304 vd->vdev_logical_ashift = MAX(logical_ashift, vd->vdev_logical_ashift);
1305 vd->vdev_ashift = MAX(vd->vdev_logical_ashift, vd->vdev_ashift);
1306
1307 if (vd->vdev_logical_ashift > SPA_MAXASHIFT) {
1308 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
1309 VDEV_AUX_ASHIFT_TOO_BIG);
1310 return (EINVAL);
1311 }
1312
1313 if (vd->vdev_asize == 0) {
1314 /*
1315 * This is the first-ever open, so use the computed values.
1316 * For testing purposes, a higher ashift can be requested.
1317 */
1318 vd->vdev_asize = asize;
1319 vd->vdev_max_asize = max_asize;
1320 } else {
1321 /*
1322 * Make sure the alignment requirement hasn't increased.
1323 */
1324 if (vd->vdev_ashift > vd->vdev_top->vdev_ashift &&
1325 vd->vdev_ops->vdev_op_leaf) {
1326 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
1327 VDEV_AUX_BAD_LABEL);
1328 return (EINVAL);
1329 }
1330 vd->vdev_max_asize = max_asize;
1331 }
1332
1333 /*
1334 * If all children are healthy and the asize has increased,
1335 * then we've experienced dynamic LUN growth. If automatic
1336 * expansion is enabled then use the additional space.

--- 293 unchanged lines hidden (view full) ---

1630{
1631 /*
1632 * Aim for roughly 200 metaslabs per vdev.
1633 */
1634 vd->vdev_ms_shift = highbit(vd->vdev_asize / 200);
1635 vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT);
1636}
1637
1638/*
1639 * Maximize performance by inflating the configured ashift for
1640 * top level vdevs to be as close to the physical ashift as
1641 * possible without exceeding the administrator specified
1642 * limit.
1643 */
1644void
1645vdev_ashift_optimize(vdev_t *vd)
1646{
1647 if (vd == vd->vdev_top &&
1648 (vd->vdev_ashift < vd->vdev_physical_ashift) &&
1649 (vd->vdev_ashift < zfs_max_auto_ashift)) {
1650 vd->vdev_ashift = MIN(zfs_max_auto_ashift,
1651 vd->vdev_physical_ashift);
1652 }
1653}
1654
1655void
1656vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg)
1657{
1658 ASSERT(vd == vd->vdev_top);
1659 ASSERT(!vd->vdev_ishole);
1660 ASSERT(ISP2(flags));
1661 ASSERT(spa_writeable(vd->vdev_spa));
1662
1663 if (flags & VDD_METASLAB)

--- 1001 unchanged lines hidden (view full) ---

2665 mutex_enter(&vd->vdev_stat_lock);
2666 bcopy(&vd->vdev_stat, vs, sizeof (*vs));
2667 vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
2668 vs->vs_state = vd->vdev_state;
2669 vs->vs_rsize = vdev_get_min_asize(vd);
2670 if (vd->vdev_ops->vdev_op_leaf)
2671 vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
2672 vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
2673 vs->vs_configured_ashift = vd->vdev_top != NULL
2674 ? vd->vdev_top->vdev_ashift : vd->vdev_ashift;
2675 vs->vs_logical_ashift = vd->vdev_logical_ashift;
2676 vs->vs_physical_ashift = vd->vdev_physical_ashift;
2677 mutex_exit(&vd->vdev_stat_lock);
2678
2679 /*
2680 * If we're getting stats on the root vdev, aggregate the I/O counts
2681 * over all top-level vdevs (i.e. the direct children of the root).
2682 */
2683 if (vd == rvd) {
2684 for (int c = 0; c < rvd->vdev_children; c++) {

--- 714 unchanged lines hidden ---