bptree.c (263397) | bptree.c (268650) |
---|---|
1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 88 unchanged lines hidden (view full) --- 97 ASSERT0(bt->bt_bytes); 98 ASSERT0(bt->bt_comp); 99 ASSERT0(bt->bt_uncomp); 100 dmu_buf_rele(db, FTAG); 101 102 return (dmu_object_free(os, obj, tx)); 103} 104 | 1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE --- 88 unchanged lines hidden (view full) --- 97 ASSERT0(bt->bt_bytes); 98 ASSERT0(bt->bt_comp); 99 ASSERT0(bt->bt_uncomp); 100 dmu_buf_rele(db, FTAG); 101 102 return (dmu_object_free(os, obj, tx)); 103} 104 |
105boolean_t 106bptree_is_empty(objset_t *os, uint64_t obj) 107{ 108 dmu_buf_t *db; 109 bptree_phys_t *bt; 110 boolean_t rv; 111 112 VERIFY0(dmu_bonus_hold(os, obj, FTAG, &db)); 113 bt = db->db_data; 114 rv = (bt->bt_begin == bt->bt_end); 115 dmu_buf_rele(db, FTAG); 116 return (rv); 117} 118 |
|
105void 106bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg, 107 uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx) 108{ 109 dmu_buf_t *db; 110 bptree_phys_t *bt; | 119void 120bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg, 121 uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx) 122{ 123 dmu_buf_t *db; 124 bptree_phys_t *bt; |
111 bptree_entry_phys_t bte; | 125 bptree_entry_phys_t bte = { 0 }; |
112 113 /* 114 * bptree objects are in the pool mos, therefore they can only be 115 * modified in syncing context. Furthermore, this is only modified 116 * by the sync thread, so no locking is necessary. 117 */ 118 ASSERT(dmu_tx_is_syncing(tx)); 119 120 VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); 121 bt = db->db_data; 122 123 bte.be_birth_txg = birth_txg; 124 bte.be_bp = *bp; | 126 127 /* 128 * bptree objects are in the pool mos, therefore they can only be 129 * modified in syncing context. Furthermore, this is only modified 130 * by the sync thread, so no locking is necessary. 131 */ 132 ASSERT(dmu_tx_is_syncing(tx)); 133 134 VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db)); 135 bt = db->db_data; 136 137 bte.be_birth_txg = birth_txg; 138 bte.be_bp = *bp; |
125 bzero(&bte.be_zb, sizeof (bte.be_zb)); | |
126 dmu_write(os, obj, bt->bt_end * sizeof (bte), sizeof (bte), &bte, tx); 127 128 dmu_buf_will_dirty(db, tx); 129 bt->bt_end++; 130 bt->bt_bytes += bytes; 131 bt->bt_comp += comp; 132 bt->bt_uncomp += uncomp; 133 dmu_buf_rele(db, FTAG); --- 14 unchanged lines hidden (view full) --- 148 if (err == 0 && ba->ba_free) { 149 ba->ba_phys->bt_bytes -= bp_get_dsize_sync(spa, bp); 150 ba->ba_phys->bt_comp -= BP_GET_PSIZE(bp); 151 ba->ba_phys->bt_uncomp -= BP_GET_UCSIZE(bp); 152 } 153 return (err); 154} 155 | 139 dmu_write(os, obj, bt->bt_end * sizeof (bte), sizeof (bte), &bte, tx); 140 141 dmu_buf_will_dirty(db, tx); 142 bt->bt_end++; 143 bt->bt_bytes += bytes; 144 bt->bt_comp += comp; 145 bt->bt_uncomp += uncomp; 146 dmu_buf_rele(db, FTAG); --- 14 unchanged lines hidden (view full) --- 161 if (err == 0 && ba->ba_free) { 162 ba->ba_phys->bt_bytes -= bp_get_dsize_sync(spa, bp); 163 ba->ba_phys->bt_comp -= BP_GET_PSIZE(bp); 164 ba->ba_phys->bt_uncomp -= BP_GET_UCSIZE(bp); 165 } 166 return (err); 167} 168 |
169/* 170 * If "free" is set: 171 * - It is assumed that "func" will be freeing the block pointers. 172 * - If "func" returns nonzero, the bookmark will be remembered and 173 * iteration will be restarted from this point on next invocation. 174 * - If an i/o error is encountered (e.g. "func" returns EIO or ECKSUM), 175 * bptree_iterate will remember the bookmark, continue traversing 176 * any additional entries, and return 0. 177 * 178 * If "free" is not set, traversal will stop and return an error if 179 * an i/o error is encountered. 180 * 181 * In either case, if zfs_free_leak_on_eio is set, i/o errors will be 182 * ignored and traversal will continue (i.e. TRAVERSE_HARD will be passed to 183 * traverse_dataset_destroyed()). 184 */ |
|
156int 157bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, 158 void *arg, dmu_tx_t *tx) 159{ | 185int 186bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, 187 void *arg, dmu_tx_t *tx) 188{ |
189 boolean_t ioerr = B_FALSE; |
|
160 int err; 161 uint64_t i; 162 dmu_buf_t *db; 163 struct bptree_args ba; 164 165 ASSERT(!free || dmu_tx_is_syncing(tx)); 166 167 err = dmu_bonus_hold(os, obj, FTAG, &db); --- 9 unchanged lines hidden (view full) --- 177 ba.ba_arg = arg; 178 ba.ba_tx = tx; 179 180 err = 0; 181 for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) { 182 bptree_entry_phys_t bte; 183 int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST; 184 | 190 int err; 191 uint64_t i; 192 dmu_buf_t *db; 193 struct bptree_args ba; 194 195 ASSERT(!free || dmu_tx_is_syncing(tx)); 196 197 err = dmu_bonus_hold(os, obj, FTAG, &db); --- 9 unchanged lines hidden (view full) --- 207 ba.ba_arg = arg; 208 ba.ba_tx = tx; 209 210 err = 0; 211 for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) { 212 bptree_entry_phys_t bte; 213 int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST; 214 |
185 ASSERT(!free || i == ba.ba_phys->bt_begin); 186 | |
187 err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte), 188 &bte, DMU_READ_NO_PREFETCH); 189 if (err != 0) 190 break; 191 | 215 err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte), 216 &bte, DMU_READ_NO_PREFETCH); 217 if (err != 0) 218 break; 219 |
192 if (zfs_recover) | 220 if (zfs_free_leak_on_eio) |
193 flags |= TRAVERSE_HARD; | 221 flags |= TRAVERSE_HARD; |
222 zfs_dbgmsg("bptree index %d: traversing from min_txg=%lld " 223 "bookmark %lld/%lld/%lld/%lld", 224 i, (longlong_t)bte.be_birth_txg, 225 (longlong_t)bte.be_zb.zb_objset, 226 (longlong_t)bte.be_zb.zb_object, 227 (longlong_t)bte.be_zb.zb_level, 228 (longlong_t)bte.be_zb.zb_blkid); |
|
194 err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp, 195 bte.be_birth_txg, &bte.be_zb, flags, 196 bptree_visit_cb, &ba); 197 if (free) { | 229 err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp, 230 bte.be_birth_txg, &bte.be_zb, flags, 231 bptree_visit_cb, &ba); 232 if (free) { |
198 if (err == ERESTART) { | 233 /* 234 * The callback has freed the visited block pointers. 235 * Record our traversal progress on disk, either by 236 * updating this record's bookmark, or by logically 237 * removing this record by advancing bt_begin. 238 */ 239 if (err != 0) { |
199 /* save bookmark for future resume */ 200 ASSERT3U(bte.be_zb.zb_objset, ==, 201 ZB_DESTROYED_OBJSET); 202 ASSERT0(bte.be_zb.zb_level); 203 dmu_write(os, obj, i * sizeof (bte), 204 sizeof (bte), &bte, tx); | 240 /* save bookmark for future resume */ 241 ASSERT3U(bte.be_zb.zb_objset, ==, 242 ZB_DESTROYED_OBJSET); 243 ASSERT0(bte.be_zb.zb_level); 244 dmu_write(os, obj, i * sizeof (bte), 245 sizeof (bte), &bte, tx); |
205 break; 206 } 207 if (err != 0) { | 246 if (err == EIO || err == ECKSUM || 247 err == ENXIO) { 248 /* 249 * Skip the rest of this tree and 250 * continue on to the next entry. 251 */ 252 err = 0; 253 ioerr = B_TRUE; 254 } else { 255 break; 256 } 257 } else if (ioerr) { |
208 /* | 258 /* |
209 * We can not properly handle an i/o 210 * error, because the traversal code 211 * does not know how to resume from an 212 * arbitrary bookmark. | 259 * This entry is finished, but there were 260 * i/o errors on previous entries, so we 261 * can't adjust bt_begin. Set this entry's 262 * be_birth_txg such that it will be 263 * treated as a no-op in future traversals. |
213 */ | 264 */ |
214 zfs_panic_recover("error %u from " 215 "traverse_dataset_destroyed()", err); | 265 bte.be_birth_txg = UINT64_MAX; 266 dmu_write(os, obj, i * sizeof (bte), 267 sizeof (bte), &bte, tx); |
216 } 217 | 268 } 269 |
218 ba.ba_phys->bt_begin++; 219 (void) dmu_free_range(os, obj, 220 i * sizeof (bte), sizeof (bte), tx); | 270 if (!ioerr) { 271 ba.ba_phys->bt_begin++; 272 (void) dmu_free_range(os, obj, 273 i * sizeof (bte), sizeof (bte), tx); 274 } 275 } else if (err != 0) { 276 break; |
221 } 222 } 223 | 277 } 278 } 279 |
224 ASSERT(!free || err != 0 || ba.ba_phys->bt_begin == ba.ba_phys->bt_end); | 280 ASSERT(!free || err != 0 || ioerr || 281 ba.ba_phys->bt_begin == ba.ba_phys->bt_end); |
225 226 /* if all blocks are free there should be no used space */ 227 if (ba.ba_phys->bt_begin == ba.ba_phys->bt_end) { | 282 283 /* if all blocks are free there should be no used space */ 284 if (ba.ba_phys->bt_begin == ba.ba_phys->bt_end) { |
285 if (zfs_free_leak_on_eio) { 286 ba.ba_phys->bt_bytes = 0; 287 ba.ba_phys->bt_comp = 0; 288 ba.ba_phys->bt_uncomp = 0; 289 } 290 |
|
228 ASSERT0(ba.ba_phys->bt_bytes); 229 ASSERT0(ba.ba_phys->bt_comp); 230 ASSERT0(ba.ba_phys->bt_uncomp); 231 } 232 233 dmu_buf_rele(db, FTAG); 234 235 return (err); 236} | 291 ASSERT0(ba.ba_phys->bt_bytes); 292 ASSERT0(ba.ba_phys->bt_comp); 293 ASSERT0(ba.ba_phys->bt_uncomp); 294 } 295 296 dmu_buf_rele(db, FTAG); 297 298 return (err); 299} |