vdev.c (254112) | vdev.c (254591) |
---|---|
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 38 unchanged lines hidden (view full) --- 47 48SYSCTL_DECL(_vfs_zfs); 49SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV"); 50 51/* 52 * Virtual device management. 53 */ 54 | 1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 38 unchanged lines hidden (view full) --- 47 48SYSCTL_DECL(_vfs_zfs); 49SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV"); 50 51/* 52 * Virtual device management. 53 */ 54 |
55/** 56 * The limit for ZFS to automatically increase a top-level vdev's ashift 57 * from logical ashift to physical ashift. 58 * 59 * Example: one or more 512B emulation child vdevs 60 * child->vdev_ashift = 9 (512 bytes) 61 * child->vdev_physical_ashift = 12 (4096 bytes) 62 * zfs_max_auto_ashift = 11 (2048 bytes) 63 * 64 * On pool creation or the addition of a new top-leve vdev, ZFS will 65 * bump the ashift of the top-level vdev to 2048. 66 * 67 * Example: one or more 512B emulation child vdevs 68 * child->vdev_ashift = 9 (512 bytes) 69 * child->vdev_physical_ashift = 12 (4096 bytes) 70 * zfs_max_auto_ashift = 13 (8192 bytes) 71 * 72 * On pool creation or the addition of a new top-leve vdev, ZFS will 73 * bump the ashift of the top-level vdev to 4096. 74 */ 75static uint64_t zfs_max_auto_ashift = SPA_MAXASHIFT; 76 77static int 78sysctl_vfs_zfs_max_auto_ashift(SYSCTL_HANDLER_ARGS) 79{ 80 uint64_t val; 81 int err; 82 83 val = zfs_max_auto_ashift; 84 err = sysctl_handle_64(oidp, &val, 0, req); 85 if (err != 0 || req->newptr == NULL) 86 return (err); 87 88 if (val > SPA_MAXASHIFT) 89 val = SPA_MAXASHIFT; 90 91 zfs_max_auto_ashift = val; 92 93 return (0); 94} 95SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift, 96 CTLTYPE_U64 | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(uint64_t), 97 sysctl_vfs_zfs_max_auto_ashift, "QU", 98 "Cap on logical -> physical ashift adjustment on new top-level vdevs."); 99 |
|
55static vdev_ops_t *vdev_ops_table[] = { 56 &vdev_root_ops, 57 &vdev_raidz_ops, 58 &vdev_mirror_ops, 59 &vdev_replacing_ops, 60 &vdev_spare_ops, 61#ifdef _KERNEL 62 &vdev_geom_ops, --- 678 unchanged lines hidden (view full) --- 741 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 742 743 mvd = vdev_alloc_common(spa, cvd->vdev_id, 0, ops); 744 745 mvd->vdev_asize = cvd->vdev_asize; 746 mvd->vdev_min_asize = cvd->vdev_min_asize; 747 mvd->vdev_max_asize = cvd->vdev_max_asize; 748 mvd->vdev_ashift = cvd->vdev_ashift; | 100static vdev_ops_t *vdev_ops_table[] = { 101 &vdev_root_ops, 102 &vdev_raidz_ops, 103 &vdev_mirror_ops, 104 &vdev_replacing_ops, 105 &vdev_spare_ops, 106#ifdef _KERNEL 107 &vdev_geom_ops, --- 678 unchanged lines hidden (view full) --- 786 ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL); 787 788 mvd = vdev_alloc_common(spa, cvd->vdev_id, 0, ops); 789 790 mvd->vdev_asize = cvd->vdev_asize; 791 mvd->vdev_min_asize = cvd->vdev_min_asize; 792 mvd->vdev_max_asize = cvd->vdev_max_asize; 793 mvd->vdev_ashift = cvd->vdev_ashift; |
794 mvd->vdev_logical_ashift = cvd->vdev_logical_ashift; 795 mvd->vdev_physical_ashift = cvd->vdev_physical_ashift; |
|
749 mvd->vdev_state = cvd->vdev_state; 750 mvd->vdev_crtxg = cvd->vdev_crtxg; 751 752 vdev_remove_child(pvd, cvd); 753 vdev_add_child(pvd, mvd); 754 cvd->vdev_id = mvd->vdev_children; 755 vdev_add_child(mvd, cvd); 756 vdev_top_update(cvd->vdev_top, cvd->vdev_top); --- 15 unchanged lines hidden (view full) --- 772 773 ASSERT(spa_config_held(cvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL); 774 775 ASSERT(mvd->vdev_children == 1); 776 ASSERT(mvd->vdev_ops == &vdev_mirror_ops || 777 mvd->vdev_ops == &vdev_replacing_ops || 778 mvd->vdev_ops == &vdev_spare_ops); 779 cvd->vdev_ashift = mvd->vdev_ashift; | 796 mvd->vdev_state = cvd->vdev_state; 797 mvd->vdev_crtxg = cvd->vdev_crtxg; 798 799 vdev_remove_child(pvd, cvd); 800 vdev_add_child(pvd, mvd); 801 cvd->vdev_id = mvd->vdev_children; 802 vdev_add_child(mvd, cvd); 803 vdev_top_update(cvd->vdev_top, cvd->vdev_top); --- 15 unchanged lines hidden (view full) --- 819 820 ASSERT(spa_config_held(cvd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL); 821 822 ASSERT(mvd->vdev_children == 1); 823 ASSERT(mvd->vdev_ops == &vdev_mirror_ops || 824 mvd->vdev_ops == &vdev_replacing_ops || 825 mvd->vdev_ops == &vdev_spare_ops); 826 cvd->vdev_ashift = mvd->vdev_ashift; |
827 cvd->vdev_logical_ashift = mvd->vdev_logical_ashift; 828 cvd->vdev_physical_ashift = mvd->vdev_physical_ashift; |
|
780 781 vdev_remove_child(mvd, cvd); 782 vdev_remove_child(pvd, mvd); 783 784 /* 785 * If cvd will replace mvd as a top-level vdev, preserve mvd's guid. 786 * Otherwise, we could have detached an offline device, and when we 787 * go to import the pool we'll think we have two top-level vdevs, --- 327 unchanged lines hidden (view full) --- 1115int 1116vdev_open(vdev_t *vd) 1117{ 1118 spa_t *spa = vd->vdev_spa; 1119 int error; 1120 uint64_t osize = 0; 1121 uint64_t max_osize = 0; 1122 uint64_t asize, max_asize, psize; | 829 830 vdev_remove_child(mvd, cvd); 831 vdev_remove_child(pvd, mvd); 832 833 /* 834 * If cvd will replace mvd as a top-level vdev, preserve mvd's guid. 835 * Otherwise, we could have detached an offline device, and when we 836 * go to import the pool we'll think we have two top-level vdevs, --- 327 unchanged lines hidden (view full) --- 1164int 1165vdev_open(vdev_t *vd) 1166{ 1167 spa_t *spa = vd->vdev_spa; 1168 int error; 1169 uint64_t osize = 0; 1170 uint64_t max_osize = 0; 1171 uint64_t asize, max_asize, psize; |
1123 uint64_t ashift = 0; | 1172 uint64_t logical_ashift = 0; 1173 uint64_t physical_ashift = 0; |
1124 1125 ASSERT(vd->vdev_open_thread == curthread || 1126 spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); 1127 ASSERT(vd->vdev_state == VDEV_STATE_CLOSED || 1128 vd->vdev_state == VDEV_STATE_CANT_OPEN || 1129 vd->vdev_state == VDEV_STATE_OFFLINE); 1130 1131 vd->vdev_stat.vs_aux = VDEV_AUX_NONE; --- 13 unchanged lines hidden (view full) --- 1145 vd->vdev_label_aux); 1146 return (SET_ERROR(ENXIO)); 1147 } else if (vd->vdev_offline) { 1148 ASSERT(vd->vdev_children == 0); 1149 vdev_set_state(vd, B_TRUE, VDEV_STATE_OFFLINE, VDEV_AUX_NONE); 1150 return (SET_ERROR(ENXIO)); 1151 } 1152 | 1174 1175 ASSERT(vd->vdev_open_thread == curthread || 1176 spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL); 1177 ASSERT(vd->vdev_state == VDEV_STATE_CLOSED || 1178 vd->vdev_state == VDEV_STATE_CANT_OPEN || 1179 vd->vdev_state == VDEV_STATE_OFFLINE); 1180 1181 vd->vdev_stat.vs_aux = VDEV_AUX_NONE; --- 13 unchanged lines hidden (view full) --- 1195 vd->vdev_label_aux); 1196 return (SET_ERROR(ENXIO)); 1197 } else if (vd->vdev_offline) { 1198 ASSERT(vd->vdev_children == 0); 1199 vdev_set_state(vd, B_TRUE, VDEV_STATE_OFFLINE, VDEV_AUX_NONE); 1200 return (SET_ERROR(ENXIO)); 1201 } 1202 |
1153 error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift); | 1203 error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, 1204 &logical_ashift, &physical_ashift); |
1154 1155 /* 1156 * Reset the vdev_reopening flag so that we actually close 1157 * the vdev on error. 1158 */ 1159 vd->vdev_reopening = B_FALSE; 1160 if (zio_injection_enabled && error == 0) 1161 error = zio_handle_device_injection(vd, NULL, ENXIO); --- 81 unchanged lines hidden (view full) --- 1243 * Make sure the allocatable size hasn't shrunk. 1244 */ 1245 if (asize < vd->vdev_min_asize) { 1246 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, 1247 VDEV_AUX_BAD_LABEL); 1248 return (SET_ERROR(EINVAL)); 1249 } 1250 | 1205 1206 /* 1207 * Reset the vdev_reopening flag so that we actually close 1208 * the vdev on error. 1209 */ 1210 vd->vdev_reopening = B_FALSE; 1211 if (zio_injection_enabled && error == 0) 1212 error = zio_handle_device_injection(vd, NULL, ENXIO); --- 81 unchanged lines hidden (view full) --- 1294 * Make sure the allocatable size hasn't shrunk. 1295 */ 1296 if (asize < vd->vdev_min_asize) { 1297 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, 1298 VDEV_AUX_BAD_LABEL); 1299 return (SET_ERROR(EINVAL)); 1300 } 1301 |
1302 vd->vdev_physical_ashift = 1303 MAX(physical_ashift, vd->vdev_physical_ashift); 1304 vd->vdev_logical_ashift = MAX(logical_ashift, vd->vdev_logical_ashift); 1305 vd->vdev_ashift = MAX(vd->vdev_logical_ashift, vd->vdev_ashift); 1306 1307 if (vd->vdev_logical_ashift > SPA_MAXASHIFT) { 1308 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, 1309 VDEV_AUX_ASHIFT_TOO_BIG); 1310 return (EINVAL); 1311 } 1312 |
|
1251 if (vd->vdev_asize == 0) { 1252 /* 1253 * This is the first-ever open, so use the computed values. 1254 * For testing purposes, a higher ashift can be requested. 1255 */ 1256 vd->vdev_asize = asize; 1257 vd->vdev_max_asize = max_asize; | 1313 if (vd->vdev_asize == 0) { 1314 /* 1315 * This is the first-ever open, so use the computed values. 1316 * For testing purposes, a higher ashift can be requested. 1317 */ 1318 vd->vdev_asize = asize; 1319 vd->vdev_max_asize = max_asize; |
1258 vd->vdev_ashift = MAX(ashift, vd->vdev_ashift); | |
1259 } else { 1260 /* | 1320 } else { 1321 /* |
1261 * Detect if the alignment requirement has increased. 1262 * We don't want to make the pool unavailable, just 1263 * issue a warning instead. | 1322 * Make sure the alignment requirement hasn't increased. |
1264 */ | 1323 */ |
1265 if (ashift > vd->vdev_top->vdev_ashift && | 1324 if (vd->vdev_ashift > vd->vdev_top->vdev_ashift && |
1266 vd->vdev_ops->vdev_op_leaf) { | 1325 vd->vdev_ops->vdev_op_leaf) { |
1267 cmn_err(CE_WARN, 1268 "Disk, '%s', has a block alignment that is " 1269 "larger than the pool's alignment\n", 1270 vd->vdev_path); | 1326 vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, 1327 VDEV_AUX_BAD_LABEL); 1328 return (EINVAL); |
1271 } 1272 vd->vdev_max_asize = max_asize; 1273 } 1274 1275 /* 1276 * If all children are healthy and the asize has increased, 1277 * then we've experienced dynamic LUN growth. If automatic 1278 * expansion is enabled then use the additional space. --- 293 unchanged lines hidden (view full) --- 1572{ 1573 /* 1574 * Aim for roughly 200 metaslabs per vdev. 1575 */ 1576 vd->vdev_ms_shift = highbit(vd->vdev_asize / 200); 1577 vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT); 1578} 1579 | 1329 } 1330 vd->vdev_max_asize = max_asize; 1331 } 1332 1333 /* 1334 * If all children are healthy and the asize has increased, 1335 * then we've experienced dynamic LUN growth. If automatic 1336 * expansion is enabled then use the additional space. --- 293 unchanged lines hidden (view full) --- 1630{ 1631 /* 1632 * Aim for roughly 200 metaslabs per vdev. 1633 */ 1634 vd->vdev_ms_shift = highbit(vd->vdev_asize / 200); 1635 vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT); 1636} 1637 |
1638/* 1639 * Maximize performance by inflating the configured ashift for 1640 * top level vdevs to be as close to the physical ashift as 1641 * possible without exceeding the administrator specified 1642 * limit. 1643 */ |
|
1580void | 1644void |
1645vdev_ashift_optimize(vdev_t *vd) 1646{ 1647 if (vd == vd->vdev_top && 1648 (vd->vdev_ashift < vd->vdev_physical_ashift) && 1649 (vd->vdev_ashift < zfs_max_auto_ashift)) { 1650 vd->vdev_ashift = MIN(zfs_max_auto_ashift, 1651 vd->vdev_physical_ashift); 1652 } 1653} 1654 1655void |
|
1581vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg) 1582{ 1583 ASSERT(vd == vd->vdev_top); 1584 ASSERT(!vd->vdev_ishole); 1585 ASSERT(ISP2(flags)); 1586 ASSERT(spa_writeable(vd->vdev_spa)); 1587 1588 if (flags & VDD_METASLAB) --- 1001 unchanged lines hidden (view full) --- 2590 mutex_enter(&vd->vdev_stat_lock); 2591 bcopy(&vd->vdev_stat, vs, sizeof (*vs)); 2592 vs->vs_timestamp = gethrtime() - vs->vs_timestamp; 2593 vs->vs_state = vd->vdev_state; 2594 vs->vs_rsize = vdev_get_min_asize(vd); 2595 if (vd->vdev_ops->vdev_op_leaf) 2596 vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; 2597 vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize; | 1656vdev_dirty(vdev_t *vd, int flags, void *arg, uint64_t txg) 1657{ 1658 ASSERT(vd == vd->vdev_top); 1659 ASSERT(!vd->vdev_ishole); 1660 ASSERT(ISP2(flags)); 1661 ASSERT(spa_writeable(vd->vdev_spa)); 1662 1663 if (flags & VDD_METASLAB) --- 1001 unchanged lines hidden (view full) --- 2665 mutex_enter(&vd->vdev_stat_lock); 2666 bcopy(&vd->vdev_stat, vs, sizeof (*vs)); 2667 vs->vs_timestamp = gethrtime() - vs->vs_timestamp; 2668 vs->vs_state = vd->vdev_state; 2669 vs->vs_rsize = vdev_get_min_asize(vd); 2670 if (vd->vdev_ops->vdev_op_leaf) 2671 vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE; 2672 vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize; |
2673 vs->vs_configured_ashift = vd->vdev_top != NULL 2674 ? vd->vdev_top->vdev_ashift : vd->vdev_ashift; 2675 vs->vs_logical_ashift = vd->vdev_logical_ashift; 2676 vs->vs_physical_ashift = vd->vdev_physical_ashift; |
|
2598 mutex_exit(&vd->vdev_stat_lock); 2599 2600 /* 2601 * If we're getting stats on the root vdev, aggregate the I/O counts 2602 * over all top-level vdevs (i.e. the direct children of the root). 2603 */ 2604 if (vd == rvd) { 2605 for (int c = 0; c < rvd->vdev_children; c++) { --- 714 unchanged lines hidden --- | 2677 mutex_exit(&vd->vdev_stat_lock); 2678 2679 /* 2680 * If we're getting stats on the root vdev, aggregate the I/O counts 2681 * over all top-level vdevs (i.e. the direct children of the root). 2682 */ 2683 if (vd == rvd) { 2684 for (int c = 0; c < rvd->vdev_children; c++) { --- 714 unchanged lines hidden --- |